xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7d0a6c19129e7069c8a40e210b34ed62989173db)
1 
2 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
3 #include "petscblaslapack.h"
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
7 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
8 {
9   PetscErrorCode  ierr;
10   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
11   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
12   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
13   const PetscInt  *ia,*ib;
14   const MatScalar *aa,*bb;
15   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
16   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
17 
18   PetscFunctionBegin;
19   *keptrows = 0;
20   ia = a->i;
21   ib = b->i;
22   for (i=0; i<m; i++) {
23     na = ia[i+1] - ia[i];
24     nb = ib[i+1] - ib[i];
25     if (!na && !nb) {
26       cnt++;
27       goto ok1;
28     }
29     aa = a->a + ia[i];
30     for (j=0; j<na; j++) {
31       if (aa[j] != 0.0) goto ok1;
32     }
33     bb = b->a + ib[i];
34     for (j=0; j <nb; j++) {
35       if (bb[j] != 0.0) goto ok1;
36     }
37     cnt++;
38     ok1:;
39   }
40   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
41   if (!n0rows) PetscFunctionReturn(0);
42   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
43   cnt = 0;
44   for (i=0; i<m; i++) {
45     na = ia[i+1] - ia[i];
46     nb = ib[i+1] - ib[i];
47     if (!na && !nb) continue;
48     aa = a->a + ia[i];
49     for(j=0; j<na;j++) {
50       if (aa[j] != 0.0) {
51         rows[cnt++] = rstart + i;
52         goto ok2;
53       }
54     }
55     bb = b->a + ib[i];
56     for (j=0; j<nb; j++) {
57       if (bb[j] != 0.0) {
58         rows[cnt++] = rstart + i;
59         goto ok2;
60       }
61     }
62     ok2:;
63   }
64   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
65   PetscFunctionReturn(0);
66 }
67 
68 #undef __FUNCT__
69 #define __FUNCT__ "MatDistribute_MPIAIJ"
70 /*
71     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
72     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
73 
74     Only for square matrices
75 */
76 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
77 {
78   PetscMPIInt    rank,size;
79   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
80   PetscErrorCode ierr;
81   Mat            mat;
82   Mat_SeqAIJ     *gmata;
83   PetscMPIInt    tag;
84   MPI_Status     status;
85   PetscBool      aij;
86   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
87 
88   PetscFunctionBegin;
89   CHKMEMQ;
90   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
91   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
92   if (!rank) {
93     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
94     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
95   }
96   if (reuse == MAT_INITIAL_MATRIX) {
97     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
98     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
99     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
100     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
101     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
102     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
103     rowners[0] = 0;
104     for (i=2; i<=size; i++) {
105       rowners[i] += rowners[i-1];
106     }
107     rstart = rowners[rank];
108     rend   = rowners[rank+1];
109     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
110     if (!rank) {
111       gmata = (Mat_SeqAIJ*) gmat->data;
112       /* send row lengths to all processors */
113       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
114       for (i=1; i<size; i++) {
115 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
116       }
117       /* determine number diagonal and off-diagonal counts */
118       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
119       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
120       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
121       jj = 0;
122       for (i=0; i<m; i++) {
123 	for (j=0; j<dlens[i]; j++) {
124           if (gmata->j[jj] < rstart) ld[i]++;
125 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
126 	  jj++;
127 	}
128       }
129       /* send column indices to other processes */
130       for (i=1; i<size; i++) {
131 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
132 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
133 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
134       }
135 
136       /* send numerical values to other processes */
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       gmataa = gmata->a;
142       gmataj = gmata->j;
143 
144     } else {
145       /* receive row lengths */
146       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
147       /* receive column indices */
148       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
149       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
150       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
151       /* determine number diagonal and off-diagonal counts */
152       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
153       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
154       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
155       jj = 0;
156       for (i=0; i<m; i++) {
157 	for (j=0; j<dlens[i]; j++) {
158           if (gmataj[jj] < rstart) ld[i]++;
159 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
160 	  jj++;
161 	}
162       }
163       /* receive numerical values */
164       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
165       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
166     }
167     /* set preallocation */
168     for (i=0; i<m; i++) {
169       dlens[i] -= olens[i];
170     }
171     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
172     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
173 
174     for (i=0; i<m; i++) {
175       dlens[i] += olens[i];
176     }
177     cnt  = 0;
178     for (i=0; i<m; i++) {
179       row  = rstart + i;
180       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
181       cnt += dlens[i];
182     }
183     if (rank) {
184       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
185     }
186     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
187     ierr = PetscFree(rowners);CHKERRQ(ierr);
188     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
189     *inmat = mat;
190   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
191     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
192     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
193     mat   = *inmat;
194     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
195     if (!rank) {
196       /* send numerical values to other processes */
197       gmata = (Mat_SeqAIJ*) gmat->data;
198       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
199       gmataa = gmata->a;
200       for (i=1; i<size; i++) {
201         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
202         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
203       }
204       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
205     } else {
206       /* receive numerical values from process 0*/
207       nz   = Ad->nz + Ao->nz;
208       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
209       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
210     }
211     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
212     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
213     ad = Ad->a;
214     ao = Ao->a;
215     if (mat->rmap->n) {
216       i  = 0;
217       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
218       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
219     }
220     for (i=1; i<mat->rmap->n; i++) {
221       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
222       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
223     }
224     i--;
225     if (mat->rmap->n) {
226       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
227     }
228     if (rank) {
229       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
230     }
231   }
232   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
233   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
234   CHKMEMQ;
235   PetscFunctionReturn(0);
236 }
237 
238 /*
239   Local utility routine that creates a mapping from the global column
240 number to the local number in the off-diagonal part of the local
241 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
242 a slightly higher hash table cost; without it it is not scalable (each processor
243 has an order N integer array but is fast to acess.
244 */
245 #undef __FUNCT__
246 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
247 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
248 {
249   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
250   PetscErrorCode ierr;
251   PetscInt       n = aij->B->cmap->n,i;
252 
253   PetscFunctionBegin;
254 #if defined (PETSC_USE_CTABLE)
255   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
256   for (i=0; i<n; i++){
257     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
258   }
259 #else
260   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
261   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
262   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
263   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
264 #endif
265   PetscFunctionReturn(0);
266 }
267 
268 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
269 { \
270     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
271     lastcol1 = col;\
272     while (high1-low1 > 5) { \
273       t = (low1+high1)/2; \
274       if (rp1[t] > col) high1 = t; \
275       else             low1  = t; \
276     } \
277       for (_i=low1; _i<high1; _i++) { \
278         if (rp1[_i] > col) break; \
279         if (rp1[_i] == col) { \
280           if (addv == ADD_VALUES) ap1[_i] += value;   \
281           else                    ap1[_i] = value; \
282           goto a_noinsert; \
283         } \
284       }  \
285       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
286       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
287       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
288       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
289       N = nrow1++ - 1; a->nz++; high1++; \
290       /* shift up all the later entries in this row */ \
291       for (ii=N; ii>=_i; ii--) { \
292         rp1[ii+1] = rp1[ii]; \
293         ap1[ii+1] = ap1[ii]; \
294       } \
295       rp1[_i] = col;  \
296       ap1[_i] = value;  \
297       a_noinsert: ; \
298       ailen[row] = nrow1; \
299 }
300 
301 
302 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
303 { \
304     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
305     lastcol2 = col;\
306     while (high2-low2 > 5) { \
307       t = (low2+high2)/2; \
308       if (rp2[t] > col) high2 = t; \
309       else             low2  = t; \
310     } \
311     for (_i=low2; _i<high2; _i++) {		\
312       if (rp2[_i] > col) break;			\
313       if (rp2[_i] == col) {			      \
314 	if (addv == ADD_VALUES) ap2[_i] += value;     \
315 	else                    ap2[_i] = value;      \
316 	goto b_noinsert;			      \
317       }						      \
318     }							      \
319     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
320     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
321     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
322     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
323     N = nrow2++ - 1; b->nz++; high2++;					\
324     /* shift up all the later entries in this row */			\
325     for (ii=N; ii>=_i; ii--) {						\
326       rp2[ii+1] = rp2[ii];						\
327       ap2[ii+1] = ap2[ii];						\
328     }									\
329     rp2[_i] = col;							\
330     ap2[_i] = value;							\
331     b_noinsert: ;								\
332     bilen[row] = nrow2;							\
333 }
334 
335 #undef __FUNCT__
336 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
337 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
338 {
339   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
340   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
341   PetscErrorCode ierr;
342   PetscInt       l,*garray = mat->garray,diag;
343 
344   PetscFunctionBegin;
345   /* code only works for square matrices A */
346 
347   /* find size of row to the left of the diagonal part */
348   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
349   row  = row - diag;
350   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
351     if (garray[b->j[b->i[row]+l]] > diag) break;
352   }
353   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
354 
355   /* diagonal part */
356   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
357 
358   /* right of diagonal part */
359   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
360   PetscFunctionReturn(0);
361 }
362 
363 #undef __FUNCT__
364 #define __FUNCT__ "MatSetValues_MPIAIJ"
365 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
366 {
367   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
368   PetscScalar    value;
369   PetscErrorCode ierr;
370   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
371   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
372   PetscBool      roworiented = aij->roworiented;
373 
374   /* Some Variables required in the macro */
375   Mat            A = aij->A;
376   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
377   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
378   MatScalar      *aa = a->a;
379   PetscBool      ignorezeroentries = a->ignorezeroentries;
380   Mat            B = aij->B;
381   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
382   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
383   MatScalar      *ba = b->a;
384 
385   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
386   PetscInt       nonew = a->nonew;
387   MatScalar      *ap1,*ap2;
388 
389   PetscFunctionBegin;
390   if (v) PetscValidScalarPointer(v,6);
391   for (i=0; i<m; i++) {
392     if (im[i] < 0) continue;
393 #if defined(PETSC_USE_DEBUG)
394     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
395 #endif
396     if (im[i] >= rstart && im[i] < rend) {
397       row      = im[i] - rstart;
398       lastcol1 = -1;
399       rp1      = aj + ai[row];
400       ap1      = aa + ai[row];
401       rmax1    = aimax[row];
402       nrow1    = ailen[row];
403       low1     = 0;
404       high1    = nrow1;
405       lastcol2 = -1;
406       rp2      = bj + bi[row];
407       ap2      = ba + bi[row];
408       rmax2    = bimax[row];
409       nrow2    = bilen[row];
410       low2     = 0;
411       high2    = nrow2;
412 
413       for (j=0; j<n; j++) {
414         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
415         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
416         if (in[j] >= cstart && in[j] < cend){
417           col = in[j] - cstart;
418           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
419         } else if (in[j] < 0) continue;
420 #if defined(PETSC_USE_DEBUG)
421         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
422 #endif
423         else {
424           if (mat->was_assembled) {
425             if (!aij->colmap) {
426               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
427             }
428 #if defined (PETSC_USE_CTABLE)
429             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
430 	    col--;
431 #else
432             col = aij->colmap[in[j]] - 1;
433 #endif
434             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
435               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
436               col =  in[j];
437               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
438               B = aij->B;
439               b = (Mat_SeqAIJ*)B->data;
440               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
441               rp2      = bj + bi[row];
442               ap2      = ba + bi[row];
443               rmax2    = bimax[row];
444               nrow2    = bilen[row];
445               low2     = 0;
446               high2    = nrow2;
447               bm       = aij->B->rmap->n;
448               ba = b->a;
449             }
450           } else col = in[j];
451           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
452         }
453       }
454     } else {
455       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
456       if (!aij->donotstash) {
457         if (roworiented) {
458           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
459         } else {
460           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
461         }
462       }
463     }
464   }
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "MatGetValues_MPIAIJ"
470 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
471 {
472   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
473   PetscErrorCode ierr;
474   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
475   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
476 
477   PetscFunctionBegin;
478   for (i=0; i<m; i++) {
479     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
480     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
481     if (idxm[i] >= rstart && idxm[i] < rend) {
482       row = idxm[i] - rstart;
483       for (j=0; j<n; j++) {
484         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
485         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
486         if (idxn[j] >= cstart && idxn[j] < cend){
487           col = idxn[j] - cstart;
488           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
489         } else {
490           if (!aij->colmap) {
491             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
492           }
493 #if defined (PETSC_USE_CTABLE)
494           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
495           col --;
496 #else
497           col = aij->colmap[idxn[j]] - 1;
498 #endif
499           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
500           else {
501             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
502           }
503         }
504       }
505     } else {
506       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
507     }
508   }
509   PetscFunctionReturn(0);
510 }
511 
512 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
516 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
517 {
518   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
519   PetscErrorCode ierr;
520   PetscInt       nstash,reallocs;
521   InsertMode     addv;
522 
523   PetscFunctionBegin;
524   if (aij->donotstash || mat->nooffprocentries) {
525     PetscFunctionReturn(0);
526   }
527 
528   /* make sure all processors are either in INSERTMODE or ADDMODE */
529   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
530   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
531   mat->insertmode = addv; /* in case this processor had no cache */
532 
533   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
534   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
535   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
541 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
542 {
543   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
544   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
545   PetscErrorCode ierr;
546   PetscMPIInt    n;
547   PetscInt       i,j,rstart,ncols,flg;
548   PetscInt       *row,*col;
549   PetscBool      other_disassembled;
550   PetscScalar    *val;
551   InsertMode     addv = mat->insertmode;
552 
553   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
554   PetscFunctionBegin;
555   if (!aij->donotstash && !mat->nooffprocentries) {
556     while (1) {
557       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
558       if (!flg) break;
559 
560       for (i=0; i<n;) {
561         /* Now identify the consecutive vals belonging to the same row */
562         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
563         if (j < n) ncols = j-i;
564         else       ncols = n-i;
565         /* Now assemble all these values with a single function call */
566         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
567         i = j;
568       }
569     }
570     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
571   }
572   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
573   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
574 
575   /* determine if any processor has disassembled, if so we must
576      also disassemble ourselfs, in order that we may reassemble. */
577   /*
578      if nonzero structure of submatrix B cannot change then we know that
579      no processor disassembled thus we can skip this stuff
580   */
581   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
582     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
583     if (mat->was_assembled && !other_disassembled) {
584       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
585     }
586   }
587   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
588     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
589   }
590   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
591   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
592   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
596   aij->rowvalues = 0;
597 
598   /* used by MatAXPY() */
599   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
600   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
601 
602   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
603   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
609 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
610 {
611   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
612   PetscErrorCode ierr;
613 
614   PetscFunctionBegin;
615   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
616   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
617   PetscFunctionReturn(0);
618 }
619 
620 #undef __FUNCT__
621 #define __FUNCT__ "MatZeroRows_MPIAIJ"
622 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
623 {
624   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
625   PetscErrorCode    ierr;
626   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
627   PetscInt          i,*owners = A->rmap->range;
628   PetscInt          *nprocs,j,idx,nsends,row;
629   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
630   PetscInt          *rvalues,count,base,slen,*source;
631   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
632   MPI_Comm          comm = ((PetscObject)A)->comm;
633   MPI_Request       *send_waits,*recv_waits;
634   MPI_Status        recv_status,*send_status;
635   const PetscScalar *xx;
636   PetscScalar       *bb;
637 #if defined(PETSC_DEBUG)
638   PetscBool      found = PETSC_FALSE;
639 #endif
640 
641   PetscFunctionBegin;
642   /*  first count number of contributors to each processor */
643   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
644   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
645   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
646   j = 0;
647   for (i=0; i<N; i++) {
648     if (lastidx > (idx = rows[i])) j = 0;
649     lastidx = idx;
650     for (; j<size; j++) {
651       if (idx >= owners[j] && idx < owners[j+1]) {
652         nprocs[2*j]++;
653         nprocs[2*j+1] = 1;
654         owner[i] = j;
655 #if defined(PETSC_DEBUG)
656         found = PETSC_TRUE;
657 #endif
658         break;
659       }
660     }
661 #if defined(PETSC_DEBUG)
662     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
663     found = PETSC_FALSE;
664 #endif
665   }
666   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
667 
668   if (A->nooffproczerorows) {
669     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
670     nrecvs = nsends;
671     nmax   = N;
672   } else {
673     /* inform other processors of number of messages and max length*/
674     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
675   }
676 
677   /* post receives:   */
678   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
679   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
680   for (i=0; i<nrecvs; i++) {
681     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
682   }
683 
684   /* do sends:
685       1) starts[i] gives the starting index in svalues for stuff going to
686          the ith processor
687   */
688   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
689   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
690   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
691   starts[0] = 0;
692   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
693   for (i=0; i<N; i++) {
694     svalues[starts[owner[i]]++] = rows[i];
695   }
696 
697   starts[0] = 0;
698   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
699   count = 0;
700   for (i=0; i<size; i++) {
701     if (nprocs[2*i+1]) {
702       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
703     }
704   }
705   ierr = PetscFree(starts);CHKERRQ(ierr);
706 
707   base = owners[rank];
708 
709   /*  wait on receives */
710   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
711   count  = nrecvs; slen = 0;
712   while (count) {
713     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
714     /* unpack receives into our local space */
715     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
716     source[imdex]  = recv_status.MPI_SOURCE;
717     lens[imdex]    = n;
718     slen          += n;
719     count--;
720   }
721   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
722 
723   /* move the data into the send scatter */
724   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
725   count = 0;
726   for (i=0; i<nrecvs; i++) {
727     values = rvalues + i*nmax;
728     for (j=0; j<lens[i]; j++) {
729       lrows[count++] = values[j] - base;
730     }
731   }
732   ierr = PetscFree(rvalues);CHKERRQ(ierr);
733   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
734   ierr = PetscFree(owner);CHKERRQ(ierr);
735   ierr = PetscFree(nprocs);CHKERRQ(ierr);
736 
737   /* fix right hand side if needed */
738   if (x && b) {
739     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
740     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
741     for (i=0; i<slen; i++) {
742       bb[lrows[i]] = diag*xx[lrows[i]];
743     }
744     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
745     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
746   }
747   /*
748         Zero the required rows. If the "diagonal block" of the matrix
749      is square and the user wishes to set the diagonal we use separate
750      code so that MatSetValues() is not called for each diagonal allocating
751      new memory, thus calling lots of mallocs and slowing things down.
752 
753   */
754   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
755   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
756   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
757     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
758   } else if (diag != 0.0) {
759     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
760     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
761       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
762 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
763     }
764     for (i = 0; i < slen; i++) {
765       row  = lrows[i] + rstart;
766       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
767     }
768     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
769     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
770   } else {
771     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
772   }
773   ierr = PetscFree(lrows);CHKERRQ(ierr);
774 
775   /* wait on sends */
776   if (nsends) {
777     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
778     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
779     ierr = PetscFree(send_status);CHKERRQ(ierr);
780   }
781   ierr = PetscFree(send_waits);CHKERRQ(ierr);
782   ierr = PetscFree(svalues);CHKERRQ(ierr);
783 
784   PetscFunctionReturn(0);
785 }
786 
787 #undef __FUNCT__
788 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
789 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
790 {
791   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
792   PetscErrorCode    ierr;
793   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
794   PetscInt          i,*owners = A->rmap->range;
795   PetscInt          *nprocs,j,idx,nsends;
796   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
797   PetscInt          *rvalues,count,base,slen,*source;
798   PetscInt          *lens,*lrows,*values,m;
799   MPI_Comm          comm = ((PetscObject)A)->comm;
800   MPI_Request       *send_waits,*recv_waits;
801   MPI_Status        recv_status,*send_status;
802   const PetscScalar *xx;
803   PetscScalar       *bb,*mask;
804   Vec               xmask,lmask;
805   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
806   const PetscInt    *aj, *ii,*ridx;
807   PetscScalar       *aa;
808 #if defined(PETSC_DEBUG)
809   PetscBool         found = PETSC_FALSE;
810 #endif
811 
812   PetscFunctionBegin;
813   /*  first count number of contributors to each processor */
814   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
815   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
816   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
817   j = 0;
818   for (i=0; i<N; i++) {
819     if (lastidx > (idx = rows[i])) j = 0;
820     lastidx = idx;
821     for (; j<size; j++) {
822       if (idx >= owners[j] && idx < owners[j+1]) {
823         nprocs[2*j]++;
824         nprocs[2*j+1] = 1;
825         owner[i] = j;
826 #if defined(PETSC_DEBUG)
827         found = PETSC_TRUE;
828 #endif
829         break;
830       }
831     }
832 #if defined(PETSC_DEBUG)
833     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
834     found = PETSC_FALSE;
835 #endif
836   }
837   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
838 
839   /* inform other processors of number of messages and max length*/
840   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
841 
842   /* post receives:   */
843   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
844   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
845   for (i=0; i<nrecvs; i++) {
846     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
847   }
848 
849   /* do sends:
850       1) starts[i] gives the starting index in svalues for stuff going to
851          the ith processor
852   */
853   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
854   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
855   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
856   starts[0] = 0;
857   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
858   for (i=0; i<N; i++) {
859     svalues[starts[owner[i]]++] = rows[i];
860   }
861 
862   starts[0] = 0;
863   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
864   count = 0;
865   for (i=0; i<size; i++) {
866     if (nprocs[2*i+1]) {
867       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
868     }
869   }
870   ierr = PetscFree(starts);CHKERRQ(ierr);
871 
872   base = owners[rank];
873 
874   /*  wait on receives */
875   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
876   count  = nrecvs; slen = 0;
877   while (count) {
878     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
879     /* unpack receives into our local space */
880     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
881     source[imdex]  = recv_status.MPI_SOURCE;
882     lens[imdex]    = n;
883     slen          += n;
884     count--;
885   }
886   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
887 
888   /* move the data into the send scatter */
889   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
890   count = 0;
891   for (i=0; i<nrecvs; i++) {
892     values = rvalues + i*nmax;
893     for (j=0; j<lens[i]; j++) {
894       lrows[count++] = values[j] - base;
895     }
896   }
897   ierr = PetscFree(rvalues);CHKERRQ(ierr);
898   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
899   ierr = PetscFree(owner);CHKERRQ(ierr);
900   ierr = PetscFree(nprocs);CHKERRQ(ierr);
901   /* lrows are the local rows to be zeroed, slen is the number of local rows */
902 
903   /* zero diagonal part of matrix */
904   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
905 
906   /* handle off diagonal part of matrix */
907   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
908   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
909   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
910   for (i=0; i<slen; i++) {
911     bb[lrows[i]] = 1;
912   }
913   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
914   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
916   ierr = VecDestroy(xmask);CHKERRQ(ierr);
917   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
918   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
921   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
922 
923   /* remove zeroed rows of off diagonal matrix */
924   ii = aij->i;
925   for (i=0; i<slen; i++) {
926     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
927   }
928 
929   /* loop over all elements of off process part of matrix zeroing removed columns*/
930   if (aij->compressedrow.use){
931     m    = aij->compressedrow.nrows;
932     ii   = aij->compressedrow.i;
933     ridx = aij->compressedrow.rindex;
934     for (i=0; i<m; i++){
935       n   = ii[i+1] - ii[i];
936       aj  = aij->j + ii[i];
937       aa  = aij->a + ii[i];
938 
939       for (j=0; j<n; j++) {
940         if (PetscAbsScalar(mask[*aj])) {
941           bb[*ridx] -= *aa*xx[*aj];
942           *aa        = 0.0;
943         }
944         aa++;
945         aj++;
946       }
947       ridx++;
948     }
949   } else { /* do not use compressed row format */
950     m = l->B->rmap->n;
951    for (i=0; i<m; i++) {
952      n   = ii[i+1] - ii[i];
953      aj  = aij->j + ii[i];
954      aa  = aij->a + ii[i];
955      for (j=0; j<n; j++) {
956         if (PetscAbsScalar(mask[*aj])) {
957           bb[i] -= *aa*xx[*aj];
958           *aa    = 0.0;
959         }
960         aa++;
961         aj++;
962       }
963     }
964   }
965 
966   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
967   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
968   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
969   ierr = VecDestroy(lmask);CHKERRQ(ierr);
970   ierr = PetscFree(lrows);CHKERRQ(ierr);
971 
972   /* wait on sends */
973   if (nsends) {
974     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
975     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
976     ierr = PetscFree(send_status);CHKERRQ(ierr);
977   }
978   ierr = PetscFree(send_waits);CHKERRQ(ierr);
979   ierr = PetscFree(svalues);CHKERRQ(ierr);
980 
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMult_MPIAIJ"
986 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
987 {
988   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
989   PetscErrorCode ierr;
990   PetscInt       nt;
991 
992   PetscFunctionBegin;
993   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
994   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
995   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
996   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
997   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
998   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1004 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1005 {
1006   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1016 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1017 {
1018   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1019   PetscErrorCode ierr;
1020 
1021   PetscFunctionBegin;
1022   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1023   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1024   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1025   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1031 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1032 {
1033   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1034   PetscErrorCode ierr;
1035   PetscBool      merged;
1036 
1037   PetscFunctionBegin;
1038   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1039   /* do nondiagonal part */
1040   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1041   if (!merged) {
1042     /* send it on its way */
1043     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1044     /* do local part */
1045     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1046     /* receive remote parts: note this assumes the values are not actually */
1047     /* added in yy until the next line, */
1048     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1049   } else {
1050     /* do local part */
1051     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1052     /* send it on its way */
1053     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1054     /* values actually were received in the Begin() but we need to call this nop */
1055     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1056   }
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 EXTERN_C_BEGIN
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1063 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1064 {
1065   MPI_Comm       comm;
1066   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1067   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1068   IS             Me,Notme;
1069   PetscErrorCode ierr;
1070   PetscInt       M,N,first,last,*notme,i;
1071   PetscMPIInt    size;
1072 
1073   PetscFunctionBegin;
1074 
1075   /* Easy test: symmetric diagonal block */
1076   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1077   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1078   if (!*f) PetscFunctionReturn(0);
1079   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1080   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1081   if (size == 1) PetscFunctionReturn(0);
1082 
1083   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1084   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1085   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1086   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1087   for (i=0; i<first; i++) notme[i] = i;
1088   for (i=last; i<M; i++) notme[i-last+first] = i;
1089   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1090   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1091   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1092   Aoff = Aoffs[0];
1093   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1094   Boff = Boffs[0];
1095   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1096   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1097   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1098   ierr = ISDestroy(Me);CHKERRQ(ierr);
1099   ierr = ISDestroy(Notme);CHKERRQ(ierr);
1100 
1101   PetscFunctionReturn(0);
1102 }
1103 EXTERN_C_END
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1107 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1108 {
1109   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBegin;
1113   /* do nondiagonal part */
1114   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1115   /* send it on its way */
1116   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1117   /* do local part */
1118   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1119   /* receive remote parts */
1120   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 /*
1125   This only works correctly for square matrices where the subblock A->A is the
1126    diagonal block
1127 */
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1130 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1131 {
1132   PetscErrorCode ierr;
1133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1134 
1135   PetscFunctionBegin;
1136   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1137   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1138   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatScale_MPIAIJ"
1144 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1145 {
1146   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1147   PetscErrorCode ierr;
1148 
1149   PetscFunctionBegin;
1150   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1151   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatDestroy_MPIAIJ"
1157 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1158 {
1159   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163 #if defined(PETSC_USE_LOG)
1164   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1165 #endif
1166   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1167   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
1168   if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);}
1169   if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);}
1170 #if defined (PETSC_USE_CTABLE)
1171   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
1172 #else
1173   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1174 #endif
1175   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1176   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
1177   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
1178   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1179   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1180   ierr = PetscFree(aij);CHKERRQ(ierr);
1181 
1182   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1188   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1189   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1190   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1196 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1197 {
1198   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1199   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1200   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1201   PetscErrorCode    ierr;
1202   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1203   int               fd;
1204   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1205   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1206   PetscScalar       *column_values;
1207   PetscInt          message_count,flowcontrolcount;
1208 
1209   PetscFunctionBegin;
1210   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1211   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1212   nz   = A->nz + B->nz;
1213   if (!rank) {
1214     header[0] = MAT_FILE_CLASSID;
1215     header[1] = mat->rmap->N;
1216     header[2] = mat->cmap->N;
1217     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1218     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1219     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     /* get largest number of rows any processor has */
1221     rlen = mat->rmap->n;
1222     range = mat->rmap->range;
1223     for (i=1; i<size; i++) {
1224       rlen = PetscMax(rlen,range[i+1] - range[i]);
1225     }
1226   } else {
1227     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1228     rlen = mat->rmap->n;
1229   }
1230 
1231   /* load up the local row counts */
1232   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1233   for (i=0; i<mat->rmap->n; i++) {
1234     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1235   }
1236 
1237   /* store the row lengths to the file */
1238   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1239   if (!rank) {
1240     MPI_Status status;
1241     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     for (i=1; i<size; i++) {
1243       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1244       rlen = range[i+1] - range[i];
1245       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1246       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1247     }
1248     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1249   } else {
1250     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1251     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1252     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1253   }
1254   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1255 
1256   /* load up the local column indices */
1257   nzmax = nz; /* )th processor needs space a largest processor needs */
1258   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1259   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1260   cnt  = 0;
1261   for (i=0; i<mat->rmap->n; i++) {
1262     for (j=B->i[i]; j<B->i[i+1]; j++) {
1263       if ( (col = garray[B->j[j]]) > cstart) break;
1264       column_indices[cnt++] = col;
1265     }
1266     for (k=A->i[i]; k<A->i[i+1]; k++) {
1267       column_indices[cnt++] = A->j[k] + cstart;
1268     }
1269     for (; j<B->i[i+1]; j++) {
1270       column_indices[cnt++] = garray[B->j[j]];
1271     }
1272   }
1273   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1274 
1275   /* store the column indices to the file */
1276    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1277   if (!rank) {
1278     MPI_Status status;
1279     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1280     for (i=1; i<size; i++) {
1281       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1282       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1283       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1284       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1285       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1286     }
1287      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1288   } else {
1289     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1290     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1291     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1292     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1293   }
1294   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1295 
1296   /* load up the local column values */
1297   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1298   cnt  = 0;
1299   for (i=0; i<mat->rmap->n; i++) {
1300     for (j=B->i[i]; j<B->i[i+1]; j++) {
1301       if ( garray[B->j[j]] > cstart) break;
1302       column_values[cnt++] = B->a[j];
1303     }
1304     for (k=A->i[i]; k<A->i[i+1]; k++) {
1305       column_values[cnt++] = A->a[k];
1306     }
1307     for (; j<B->i[i+1]; j++) {
1308       column_values[cnt++] = B->a[j];
1309     }
1310   }
1311   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1312 
1313   /* store the column values to the file */
1314    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1315   if (!rank) {
1316     MPI_Status status;
1317     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1318     for (i=1; i<size; i++) {
1319        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1320       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1321       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1322       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1323       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1324     }
1325     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1326   } else {
1327     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1328     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1329     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1330     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1331   }
1332   ierr = PetscFree(column_values);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1338 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1339 {
1340   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1341   PetscErrorCode    ierr;
1342   PetscMPIInt       rank = aij->rank,size = aij->size;
1343   PetscBool         isdraw,iascii,isbinary;
1344   PetscViewer       sviewer;
1345   PetscViewerFormat format;
1346 
1347   PetscFunctionBegin;
1348   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1349   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1350   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1351   if (iascii) {
1352     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1353     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1354       MatInfo    info;
1355       PetscBool  inodes;
1356 
1357       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1358       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1359       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1360       if (!inodes) {
1361         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1362 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1363       } else {
1364         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1365 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1366       }
1367       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1368       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1369       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1370       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1371       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1372       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1373       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1374       PetscFunctionReturn(0);
1375     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1376       PetscInt   inodecount,inodelimit,*inodes;
1377       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1378       if (inodes) {
1379         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1380       } else {
1381         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1382       }
1383       PetscFunctionReturn(0);
1384     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1385       PetscFunctionReturn(0);
1386     }
1387   } else if (isbinary) {
1388     if (size == 1) {
1389       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1390       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1391     } else {
1392       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1393     }
1394     PetscFunctionReturn(0);
1395   } else if (isdraw) {
1396     PetscDraw  draw;
1397     PetscBool  isnull;
1398     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1399     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1400   }
1401 
1402   if (size == 1) {
1403     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1404     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1405   } else {
1406     /* assemble the entire matrix onto first processor. */
1407     Mat         A;
1408     Mat_SeqAIJ  *Aloc;
1409     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1410     MatScalar   *a;
1411 
1412     if (mat->rmap->N > 1024) {
1413       PetscBool  flg = PETSC_FALSE;
1414 
1415       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1416       if (!flg) {
1417         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1418       }
1419     }
1420 
1421     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1422     if (!rank) {
1423       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1424     } else {
1425       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1426     }
1427     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1428     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1429     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1430     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1431 
1432     /* copy over the A part */
1433     Aloc = (Mat_SeqAIJ*)aij->A->data;
1434     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1435     row = mat->rmap->rstart;
1436     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1437     for (i=0; i<m; i++) {
1438       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1439       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1440     }
1441     aj = Aloc->j;
1442     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1443 
1444     /* copy over the B part */
1445     Aloc = (Mat_SeqAIJ*)aij->B->data;
1446     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1447     row  = mat->rmap->rstart;
1448     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1449     ct   = cols;
1450     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1451     for (i=0; i<m; i++) {
1452       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1453       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1454     }
1455     ierr = PetscFree(ct);CHKERRQ(ierr);
1456     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1457     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1458     /*
1459        Everyone has to call to draw the matrix since the graphics waits are
1460        synchronized across all processors that share the PetscDraw object
1461     */
1462     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1463     if (!rank) {
1464       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1465       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1466       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1467       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1468     }
1469     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1470     ierr = MatDestroy(A);CHKERRQ(ierr);
1471   }
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatView_MPIAIJ"
1477 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1478 {
1479   PetscErrorCode ierr;
1480   PetscBool      iascii,isdraw,issocket,isbinary;
1481 
1482   PetscFunctionBegin;
1483   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1484   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1485   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1486   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1487   if (iascii || isdraw || isbinary || issocket) {
1488     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1489   } else {
1490     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1491   }
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatSOR_MPIAIJ"
1497 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1498 {
1499   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1500   PetscErrorCode ierr;
1501   Vec            bb1 = 0;
1502   PetscBool      hasop;
1503 
1504   PetscFunctionBegin;
1505   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1506     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1507   }
1508 
1509   if (flag == SOR_APPLY_UPPER) {
1510     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1511     PetscFunctionReturn(0);
1512   }
1513 
1514   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1515     if (flag & SOR_ZERO_INITIAL_GUESS) {
1516       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1517       its--;
1518     }
1519 
1520     while (its--) {
1521       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1522       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1523 
1524       /* update rhs: bb1 = bb - B*x */
1525       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1526       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1527 
1528       /* local sweep */
1529       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1530     }
1531   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1532     if (flag & SOR_ZERO_INITIAL_GUESS) {
1533       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1534       its--;
1535     }
1536     while (its--) {
1537       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1538       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1539 
1540       /* update rhs: bb1 = bb - B*x */
1541       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1542       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1543 
1544       /* local sweep */
1545       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1546     }
1547   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1548     if (flag & SOR_ZERO_INITIAL_GUESS) {
1549       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1550       its--;
1551     }
1552     while (its--) {
1553       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1554       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1555 
1556       /* update rhs: bb1 = bb - B*x */
1557       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1558       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1559 
1560       /* local sweep */
1561       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1562     }
1563   }  else if (flag & SOR_EISENSTAT) {
1564     Vec         xx1;
1565 
1566     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1567     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1568 
1569     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1570     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1571     if (!mat->diag) {
1572       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1573       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1574     }
1575     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1576     if (hasop) {
1577       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1578     } else {
1579       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1580     }
1581     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1582 
1583     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1584 
1585     /* local sweep */
1586     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1587     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1588     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1589   } else {
1590     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1591   }
1592 
1593   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 #undef __FUNCT__
1598 #define __FUNCT__ "MatPermute_MPIAIJ"
1599 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1600 {
1601   MPI_Comm       comm,pcomm;
1602   PetscInt       first,local_size,nrows;
1603   const PetscInt *rows;
1604   PetscMPIInt    size;
1605   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1606   PetscErrorCode ierr;
1607 
1608   PetscFunctionBegin;
1609   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1610   /* make a collective version of 'rowp' */
1611   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1612   if (pcomm==comm) {
1613     crowp = rowp;
1614   } else {
1615     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1616     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1617     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1618     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1619   }
1620   /* collect the global row permutation and invert it */
1621   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1622   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1623   if (pcomm!=comm) {
1624     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1625   }
1626   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1627   /* get the local target indices */
1628   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1629   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1630   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1631   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1632   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1633   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1634   /* the column permutation is so much easier;
1635      make a local version of 'colp' and invert it */
1636   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1637   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1638   if (size==1) {
1639     lcolp = colp;
1640   } else {
1641     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1642     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1643     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1644   }
1645   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1646   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1647   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1648   if (size>1) {
1649     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1650     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1651   }
1652   /* now we just get the submatrix */
1653   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1654   /* clean up */
1655   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1656   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1657   PetscFunctionReturn(0);
1658 }
1659 
1660 #undef __FUNCT__
1661 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1662 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1663 {
1664   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1665   Mat            A = mat->A,B = mat->B;
1666   PetscErrorCode ierr;
1667   PetscReal      isend[5],irecv[5];
1668 
1669   PetscFunctionBegin;
1670   info->block_size     = 1.0;
1671   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1672   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1673   isend[3] = info->memory;  isend[4] = info->mallocs;
1674   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1675   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1676   isend[3] += info->memory;  isend[4] += info->mallocs;
1677   if (flag == MAT_LOCAL) {
1678     info->nz_used      = isend[0];
1679     info->nz_allocated = isend[1];
1680     info->nz_unneeded  = isend[2];
1681     info->memory       = isend[3];
1682     info->mallocs      = isend[4];
1683   } else if (flag == MAT_GLOBAL_MAX) {
1684     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1685     info->nz_used      = irecv[0];
1686     info->nz_allocated = irecv[1];
1687     info->nz_unneeded  = irecv[2];
1688     info->memory       = irecv[3];
1689     info->mallocs      = irecv[4];
1690   } else if (flag == MAT_GLOBAL_SUM) {
1691     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1692     info->nz_used      = irecv[0];
1693     info->nz_allocated = irecv[1];
1694     info->nz_unneeded  = irecv[2];
1695     info->memory       = irecv[3];
1696     info->mallocs      = irecv[4];
1697   }
1698   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1699   info->fill_ratio_needed = 0;
1700   info->factor_mallocs    = 0;
1701 
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatSetOption_MPIAIJ"
1707 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1708 {
1709   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1710   PetscErrorCode ierr;
1711 
1712   PetscFunctionBegin;
1713   switch (op) {
1714   case MAT_NEW_NONZERO_LOCATIONS:
1715   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1716   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1717   case MAT_KEEP_NONZERO_PATTERN:
1718   case MAT_NEW_NONZERO_LOCATION_ERR:
1719   case MAT_USE_INODES:
1720   case MAT_IGNORE_ZERO_ENTRIES:
1721     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1722     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1723     break;
1724   case MAT_ROW_ORIENTED:
1725     a->roworiented = flg;
1726     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1727     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1728     break;
1729   case MAT_NEW_DIAGONALS:
1730     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1731     break;
1732   case MAT_IGNORE_OFF_PROC_ENTRIES:
1733     a->donotstash = PETSC_TRUE;
1734     break;
1735   case MAT_SPD:
1736     A->spd_set                         = PETSC_TRUE;
1737     A->spd                             = flg;
1738     if (flg) {
1739       A->symmetric                     = PETSC_TRUE;
1740       A->structurally_symmetric        = PETSC_TRUE;
1741       A->symmetric_set                 = PETSC_TRUE;
1742       A->structurally_symmetric_set    = PETSC_TRUE;
1743     }
1744     break;
1745   case MAT_SYMMETRIC:
1746     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1747     break;
1748   case MAT_STRUCTURALLY_SYMMETRIC:
1749     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1750     break;
1751   case MAT_HERMITIAN:
1752     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1753     break;
1754   case MAT_SYMMETRY_ETERNAL:
1755     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1756     break;
1757   default:
1758     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1759   }
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 #undef __FUNCT__
1764 #define __FUNCT__ "MatGetRow_MPIAIJ"
1765 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1766 {
1767   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1768   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1769   PetscErrorCode ierr;
1770   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1771   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1772   PetscInt       *cmap,*idx_p;
1773 
1774   PetscFunctionBegin;
1775   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1776   mat->getrowactive = PETSC_TRUE;
1777 
1778   if (!mat->rowvalues && (idx || v)) {
1779     /*
1780         allocate enough space to hold information from the longest row.
1781     */
1782     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1783     PetscInt   max = 1,tmp;
1784     for (i=0; i<matin->rmap->n; i++) {
1785       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1786       if (max < tmp) { max = tmp; }
1787     }
1788     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1789   }
1790 
1791   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1792   lrow = row - rstart;
1793 
1794   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1795   if (!v)   {pvA = 0; pvB = 0;}
1796   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1797   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1798   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1799   nztot = nzA + nzB;
1800 
1801   cmap  = mat->garray;
1802   if (v  || idx) {
1803     if (nztot) {
1804       /* Sort by increasing column numbers, assuming A and B already sorted */
1805       PetscInt imark = -1;
1806       if (v) {
1807         *v = v_p = mat->rowvalues;
1808         for (i=0; i<nzB; i++) {
1809           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1810           else break;
1811         }
1812         imark = i;
1813         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1814         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1815       }
1816       if (idx) {
1817         *idx = idx_p = mat->rowindices;
1818         if (imark > -1) {
1819           for (i=0; i<imark; i++) {
1820             idx_p[i] = cmap[cworkB[i]];
1821           }
1822         } else {
1823           for (i=0; i<nzB; i++) {
1824             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1825             else break;
1826           }
1827           imark = i;
1828         }
1829         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1830         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1831       }
1832     } else {
1833       if (idx) *idx = 0;
1834       if (v)   *v   = 0;
1835     }
1836   }
1837   *nz = nztot;
1838   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1839   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 #undef __FUNCT__
1844 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1845 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1846 {
1847   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1848 
1849   PetscFunctionBegin;
1850   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1851   aij->getrowactive = PETSC_FALSE;
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "MatNorm_MPIAIJ"
1857 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1858 {
1859   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1860   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1861   PetscErrorCode ierr;
1862   PetscInt       i,j,cstart = mat->cmap->rstart;
1863   PetscReal      sum = 0.0;
1864   MatScalar      *v;
1865 
1866   PetscFunctionBegin;
1867   if (aij->size == 1) {
1868     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1869   } else {
1870     if (type == NORM_FROBENIUS) {
1871       v = amat->a;
1872       for (i=0; i<amat->nz; i++) {
1873 #if defined(PETSC_USE_COMPLEX)
1874         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1875 #else
1876         sum += (*v)*(*v); v++;
1877 #endif
1878       }
1879       v = bmat->a;
1880       for (i=0; i<bmat->nz; i++) {
1881 #if defined(PETSC_USE_COMPLEX)
1882         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1883 #else
1884         sum += (*v)*(*v); v++;
1885 #endif
1886       }
1887       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1888       *norm = sqrt(*norm);
1889     } else if (type == NORM_1) { /* max column norm */
1890       PetscReal *tmp,*tmp2;
1891       PetscInt  *jj,*garray = aij->garray;
1892       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1893       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1894       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1895       *norm = 0.0;
1896       v = amat->a; jj = amat->j;
1897       for (j=0; j<amat->nz; j++) {
1898         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1899       }
1900       v = bmat->a; jj = bmat->j;
1901       for (j=0; j<bmat->nz; j++) {
1902         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1903       }
1904       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1905       for (j=0; j<mat->cmap->N; j++) {
1906         if (tmp2[j] > *norm) *norm = tmp2[j];
1907       }
1908       ierr = PetscFree(tmp);CHKERRQ(ierr);
1909       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1910     } else if (type == NORM_INFINITY) { /* max row norm */
1911       PetscReal ntemp = 0.0;
1912       for (j=0; j<aij->A->rmap->n; j++) {
1913         v = amat->a + amat->i[j];
1914         sum = 0.0;
1915         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1916           sum += PetscAbsScalar(*v); v++;
1917         }
1918         v = bmat->a + bmat->i[j];
1919         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1920           sum += PetscAbsScalar(*v); v++;
1921         }
1922         if (sum > ntemp) ntemp = sum;
1923       }
1924       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1925     } else {
1926       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1927     }
1928   }
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatTranspose_MPIAIJ"
1934 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1935 {
1936   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1937   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1938   PetscErrorCode ierr;
1939   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1940   PetscInt       cstart=A->cmap->rstart,ncol;
1941   Mat            B;
1942   MatScalar      *array;
1943 
1944   PetscFunctionBegin;
1945   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1946 
1947   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1948   ai = Aloc->i; aj = Aloc->j;
1949   bi = Bloc->i; bj = Bloc->j;
1950   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1951     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1952     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1953     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1954     for (i=0; i<ai[ma]; i++){
1955       d_nnz[aj[i]] ++;
1956       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1957     }
1958 
1959     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1960     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1961     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1962     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1963     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1964   } else {
1965     B = *matout;
1966   }
1967 
1968   /* copy over the A part */
1969   array = Aloc->a;
1970   row = A->rmap->rstart;
1971   for (i=0; i<ma; i++) {
1972     ncol = ai[i+1]-ai[i];
1973     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1974     row++; array += ncol; aj += ncol;
1975   }
1976   aj = Aloc->j;
1977   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1978 
1979   /* copy over the B part */
1980   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1981   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1982   array = Bloc->a;
1983   row = A->rmap->rstart;
1984   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1985   cols_tmp = cols;
1986   for (i=0; i<mb; i++) {
1987     ncol = bi[i+1]-bi[i];
1988     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1989     row++; array += ncol; cols_tmp += ncol;
1990   }
1991   ierr = PetscFree(cols);CHKERRQ(ierr);
1992 
1993   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1994   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1995   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1996     *matout = B;
1997   } else {
1998     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2005 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2006 {
2007   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2008   Mat            a = aij->A,b = aij->B;
2009   PetscErrorCode ierr;
2010   PetscInt       s1,s2,s3;
2011 
2012   PetscFunctionBegin;
2013   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2014   if (rr) {
2015     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2016     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2017     /* Overlap communication with computation. */
2018     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2019   }
2020   if (ll) {
2021     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2022     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2023     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2024   }
2025   /* scale  the diagonal block */
2026   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2027 
2028   if (rr) {
2029     /* Do a scatter end and then right scale the off-diagonal block */
2030     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2031     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2032   }
2033 
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2039 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2040 {
2041   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2042   PetscErrorCode ierr;
2043 
2044   PetscFunctionBegin;
2045   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2046   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2047   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2048   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 #undef __FUNCT__
2052 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2053 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2054 {
2055   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2056   PetscErrorCode ierr;
2057 
2058   PetscFunctionBegin;
2059   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 #undef __FUNCT__
2064 #define __FUNCT__ "MatEqual_MPIAIJ"
2065 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2066 {
2067   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2068   Mat            a,b,c,d;
2069   PetscBool      flg;
2070   PetscErrorCode ierr;
2071 
2072   PetscFunctionBegin;
2073   a = matA->A; b = matA->B;
2074   c = matB->A; d = matB->B;
2075 
2076   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2077   if (flg) {
2078     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2079   }
2080   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 #undef __FUNCT__
2085 #define __FUNCT__ "MatCopy_MPIAIJ"
2086 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2087 {
2088   PetscErrorCode ierr;
2089   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2090   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2091 
2092   PetscFunctionBegin;
2093   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2094   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2095     /* because of the column compression in the off-processor part of the matrix a->B,
2096        the number of columns in a->B and b->B may be different, hence we cannot call
2097        the MatCopy() directly on the two parts. If need be, we can provide a more
2098        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2099        then copying the submatrices */
2100     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2101   } else {
2102     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2103     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2104   }
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2110 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2111 {
2112   PetscErrorCode ierr;
2113 
2114   PetscFunctionBegin;
2115   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2116   PetscFunctionReturn(0);
2117 }
2118 
2119 #undef __FUNCT__
2120 #define __FUNCT__ "MatAXPY_MPIAIJ"
2121 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2122 {
2123   PetscErrorCode ierr;
2124   PetscInt       i;
2125   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2126   PetscBLASInt   bnz,one=1;
2127   Mat_SeqAIJ     *x,*y;
2128 
2129   PetscFunctionBegin;
2130   if (str == SAME_NONZERO_PATTERN) {
2131     PetscScalar alpha = a;
2132     x = (Mat_SeqAIJ *)xx->A->data;
2133     y = (Mat_SeqAIJ *)yy->A->data;
2134     bnz = PetscBLASIntCast(x->nz);
2135     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2136     x = (Mat_SeqAIJ *)xx->B->data;
2137     y = (Mat_SeqAIJ *)yy->B->data;
2138     bnz = PetscBLASIntCast(x->nz);
2139     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2140   } else if (str == SUBSET_NONZERO_PATTERN) {
2141     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2142 
2143     x = (Mat_SeqAIJ *)xx->B->data;
2144     y = (Mat_SeqAIJ *)yy->B->data;
2145     if (y->xtoy && y->XtoY != xx->B) {
2146       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2147       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2148     }
2149     if (!y->xtoy) { /* get xtoy */
2150       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2151       y->XtoY = xx->B;
2152       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2153     }
2154     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2155   } else {
2156     Mat B;
2157     PetscInt *nnz_d,*nnz_o;
2158     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2159     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2160     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2161     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2162     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2163     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2164     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2165     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2166     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2167     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2168     ierr = MatHeaderReplace(Y,B);
2169     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2170     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2171   }
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2176 
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatConjugate_MPIAIJ"
2179 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2180 {
2181 #if defined(PETSC_USE_COMPLEX)
2182   PetscErrorCode ierr;
2183   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2184 
2185   PetscFunctionBegin;
2186   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2187   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2188 #else
2189   PetscFunctionBegin;
2190 #endif
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 #undef __FUNCT__
2195 #define __FUNCT__ "MatRealPart_MPIAIJ"
2196 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2197 {
2198   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2199   PetscErrorCode ierr;
2200 
2201   PetscFunctionBegin;
2202   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2203   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 #undef __FUNCT__
2208 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2209 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2210 {
2211   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2212   PetscErrorCode ierr;
2213 
2214   PetscFunctionBegin;
2215   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2216   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2217   PetscFunctionReturn(0);
2218 }
2219 
2220 #ifdef PETSC_HAVE_PBGL
2221 
2222 #include <boost/parallel/mpi/bsp_process_group.hpp>
2223 #include <boost/graph/distributed/ilu_default_graph.hpp>
2224 #include <boost/graph/distributed/ilu_0_block.hpp>
2225 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2226 #include <boost/graph/distributed/petsc/interface.hpp>
2227 #include <boost/multi_array.hpp>
2228 #include <boost/parallel/distributed_property_map->hpp>
2229 
2230 #undef __FUNCT__
2231 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2232 /*
2233   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2234 */
2235 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2236 {
2237   namespace petsc = boost::distributed::petsc;
2238 
2239   namespace graph_dist = boost::graph::distributed;
2240   using boost::graph::distributed::ilu_default::process_group_type;
2241   using boost::graph::ilu_permuted;
2242 
2243   PetscBool       row_identity, col_identity;
2244   PetscContainer  c;
2245   PetscInt        m, n, M, N;
2246   PetscErrorCode  ierr;
2247 
2248   PetscFunctionBegin;
2249   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2250   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2251   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2252   if (!row_identity || !col_identity) {
2253     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2254   }
2255 
2256   process_group_type pg;
2257   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2258   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2259   lgraph_type&   level_graph = *lgraph_p;
2260   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2261 
2262   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2263   ilu_permuted(level_graph);
2264 
2265   /* put together the new matrix */
2266   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2267   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2268   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2269   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2270   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2271   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2272   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2273 
2274   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2275   ierr = PetscContainerSetPointer(c, lgraph_p);
2276   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 #undef __FUNCT__
2281 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2282 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2283 {
2284   PetscFunctionBegin;
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 #undef __FUNCT__
2289 #define __FUNCT__ "MatSolve_MPIAIJ"
2290 /*
2291   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2292 */
2293 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2294 {
2295   namespace graph_dist = boost::graph::distributed;
2296 
2297   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2298   lgraph_type*   lgraph_p;
2299   PetscContainer c;
2300   PetscErrorCode ierr;
2301 
2302   PetscFunctionBegin;
2303   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2304   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2305   ierr = VecCopy(b, x);CHKERRQ(ierr);
2306 
2307   PetscScalar* array_x;
2308   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2309   PetscInt sx;
2310   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2311 
2312   PetscScalar* array_b;
2313   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2314   PetscInt sb;
2315   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2316 
2317   lgraph_type&   level_graph = *lgraph_p;
2318   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2319 
2320   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2321   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2322                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2323 
2324   typedef boost::iterator_property_map<array_ref_type::iterator,
2325                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2326   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2327                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2328 
2329   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2330 
2331   PetscFunctionReturn(0);
2332 }
2333 #endif
2334 
2335 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2336   PetscInt       nzlocal,nsends,nrecvs;
2337   PetscMPIInt    *send_rank,*recv_rank;
2338   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2339   PetscScalar    *sbuf_a,**rbuf_a;
2340   PetscErrorCode (*MatDestroy)(Mat);
2341 } Mat_Redundant;
2342 
2343 #undef __FUNCT__
2344 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2345 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2346 {
2347   PetscErrorCode       ierr;
2348   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2349   PetscInt             i;
2350 
2351   PetscFunctionBegin;
2352   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2353   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2354   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2355   for (i=0; i<redund->nrecvs; i++){
2356     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2357     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2358   }
2359   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2360   ierr = PetscFree(redund);CHKERRQ(ierr);
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 #undef __FUNCT__
2365 #define __FUNCT__ "MatDestroy_MatRedundant"
2366 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2367 {
2368   PetscErrorCode  ierr;
2369   PetscContainer  container;
2370   Mat_Redundant   *redund=PETSC_NULL;
2371 
2372   PetscFunctionBegin;
2373   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2374   if (container) {
2375     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2376   } else {
2377     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2378   }
2379   A->ops->destroy = redund->MatDestroy;
2380   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2381   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2382   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 #undef __FUNCT__
2387 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2388 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2389 {
2390   PetscMPIInt    rank,size;
2391   MPI_Comm       comm=((PetscObject)mat)->comm;
2392   PetscErrorCode ierr;
2393   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2394   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2395   PetscInt       *rowrange=mat->rmap->range;
2396   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2397   Mat            A=aij->A,B=aij->B,C=*matredundant;
2398   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2399   PetscScalar    *sbuf_a;
2400   PetscInt       nzlocal=a->nz+b->nz;
2401   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2402   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2403   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2404   MatScalar      *aworkA,*aworkB;
2405   PetscScalar    *vals;
2406   PetscMPIInt    tag1,tag2,tag3,imdex;
2407   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2408                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2409   MPI_Status     recv_status,*send_status;
2410   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2411   PetscInt       **rbuf_j=PETSC_NULL;
2412   PetscScalar    **rbuf_a=PETSC_NULL;
2413   Mat_Redundant  *redund=PETSC_NULL;
2414   PetscContainer container;
2415 
2416   PetscFunctionBegin;
2417   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2418   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2419 
2420   if (reuse == MAT_REUSE_MATRIX) {
2421     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2422     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2423     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2424     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2425     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2426     if (container) {
2427       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2428     } else {
2429       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2430     }
2431     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2432 
2433     nsends    = redund->nsends;
2434     nrecvs    = redund->nrecvs;
2435     send_rank = redund->send_rank;
2436     recv_rank = redund->recv_rank;
2437     sbuf_nz   = redund->sbuf_nz;
2438     rbuf_nz   = redund->rbuf_nz;
2439     sbuf_j    = redund->sbuf_j;
2440     sbuf_a    = redund->sbuf_a;
2441     rbuf_j    = redund->rbuf_j;
2442     rbuf_a    = redund->rbuf_a;
2443   }
2444 
2445   if (reuse == MAT_INITIAL_MATRIX){
2446     PetscMPIInt  subrank,subsize;
2447     PetscInt     nleftover,np_subcomm;
2448     /* get the destination processors' id send_rank, nsends and nrecvs */
2449     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2450     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2451     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2452     np_subcomm = size/nsubcomm;
2453     nleftover  = size - nsubcomm*np_subcomm;
2454     nsends = 0; nrecvs = 0;
2455     for (i=0; i<size; i++){ /* i=rank*/
2456       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2457         send_rank[nsends] = i; nsends++;
2458         recv_rank[nrecvs++] = i;
2459       }
2460     }
2461     if (rank >= size - nleftover){/* this proc is a leftover processor */
2462       i = size-nleftover-1;
2463       j = 0;
2464       while (j < nsubcomm - nleftover){
2465         send_rank[nsends++] = i;
2466         i--; j++;
2467       }
2468     }
2469 
2470     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2471       for (i=0; i<nleftover; i++){
2472         recv_rank[nrecvs++] = size-nleftover+i;
2473       }
2474     }
2475 
2476     /* allocate sbuf_j, sbuf_a */
2477     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2478     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2479     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2480   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2481 
2482   /* copy mat's local entries into the buffers */
2483   if (reuse == MAT_INITIAL_MATRIX){
2484     rownz_max = 0;
2485     rptr = sbuf_j;
2486     cols = sbuf_j + rend-rstart + 1;
2487     vals = sbuf_a;
2488     rptr[0] = 0;
2489     for (i=0; i<rend-rstart; i++){
2490       row = i + rstart;
2491       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2492       ncols  = nzA + nzB;
2493       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2494       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2495       /* load the column indices for this row into cols */
2496       lwrite = 0;
2497       for (l=0; l<nzB; l++) {
2498         if ((ctmp = bmap[cworkB[l]]) < cstart){
2499           vals[lwrite]   = aworkB[l];
2500           cols[lwrite++] = ctmp;
2501         }
2502       }
2503       for (l=0; l<nzA; l++){
2504         vals[lwrite]   = aworkA[l];
2505         cols[lwrite++] = cstart + cworkA[l];
2506       }
2507       for (l=0; l<nzB; l++) {
2508         if ((ctmp = bmap[cworkB[l]]) >= cend){
2509           vals[lwrite]   = aworkB[l];
2510           cols[lwrite++] = ctmp;
2511         }
2512       }
2513       vals += ncols;
2514       cols += ncols;
2515       rptr[i+1] = rptr[i] + ncols;
2516       if (rownz_max < ncols) rownz_max = ncols;
2517     }
2518     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2519   } else { /* only copy matrix values into sbuf_a */
2520     rptr = sbuf_j;
2521     vals = sbuf_a;
2522     rptr[0] = 0;
2523     for (i=0; i<rend-rstart; i++){
2524       row = i + rstart;
2525       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2526       ncols  = nzA + nzB;
2527       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2528       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2529       lwrite = 0;
2530       for (l=0; l<nzB; l++) {
2531         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2532       }
2533       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2534       for (l=0; l<nzB; l++) {
2535         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2536       }
2537       vals += ncols;
2538       rptr[i+1] = rptr[i] + ncols;
2539     }
2540   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2541 
2542   /* send nzlocal to others, and recv other's nzlocal */
2543   /*--------------------------------------------------*/
2544   if (reuse == MAT_INITIAL_MATRIX){
2545     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2546     s_waits2 = s_waits3 + nsends;
2547     s_waits1 = s_waits2 + nsends;
2548     r_waits1 = s_waits1 + nsends;
2549     r_waits2 = r_waits1 + nrecvs;
2550     r_waits3 = r_waits2 + nrecvs;
2551   } else {
2552     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2553     r_waits3 = s_waits3 + nsends;
2554   }
2555 
2556   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2557   if (reuse == MAT_INITIAL_MATRIX){
2558     /* get new tags to keep the communication clean */
2559     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2560     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2561     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2562 
2563     /* post receives of other's nzlocal */
2564     for (i=0; i<nrecvs; i++){
2565       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2566     }
2567     /* send nzlocal to others */
2568     for (i=0; i<nsends; i++){
2569       sbuf_nz[i] = nzlocal;
2570       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2571     }
2572     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2573     count = nrecvs;
2574     while (count) {
2575       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2576       recv_rank[imdex] = recv_status.MPI_SOURCE;
2577       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2578       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2579 
2580       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2581       rbuf_nz[imdex] += i + 2;
2582       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2583       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2584       count--;
2585     }
2586     /* wait on sends of nzlocal */
2587     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2588     /* send mat->i,j to others, and recv from other's */
2589     /*------------------------------------------------*/
2590     for (i=0; i<nsends; i++){
2591       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2592       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2593     }
2594     /* wait on receives of mat->i,j */
2595     /*------------------------------*/
2596     count = nrecvs;
2597     while (count) {
2598       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2599       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2600       count--;
2601     }
2602     /* wait on sends of mat->i,j */
2603     /*---------------------------*/
2604     if (nsends) {
2605       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2606     }
2607   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2608 
2609   /* post receives, send and receive mat->a */
2610   /*----------------------------------------*/
2611   for (imdex=0; imdex<nrecvs; imdex++) {
2612     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2613   }
2614   for (i=0; i<nsends; i++){
2615     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2616   }
2617   count = nrecvs;
2618   while (count) {
2619     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2620     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2621     count--;
2622   }
2623   if (nsends) {
2624     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2625   }
2626 
2627   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2628 
2629   /* create redundant matrix */
2630   /*-------------------------*/
2631   if (reuse == MAT_INITIAL_MATRIX){
2632     /* compute rownz_max for preallocation */
2633     for (imdex=0; imdex<nrecvs; imdex++){
2634       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2635       rptr = rbuf_j[imdex];
2636       for (i=0; i<j; i++){
2637         ncols = rptr[i+1] - rptr[i];
2638         if (rownz_max < ncols) rownz_max = ncols;
2639       }
2640     }
2641 
2642     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2643     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2644     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2645     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2646     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2647   } else {
2648     C = *matredundant;
2649   }
2650 
2651   /* insert local matrix entries */
2652   rptr = sbuf_j;
2653   cols = sbuf_j + rend-rstart + 1;
2654   vals = sbuf_a;
2655   for (i=0; i<rend-rstart; i++){
2656     row   = i + rstart;
2657     ncols = rptr[i+1] - rptr[i];
2658     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2659     vals += ncols;
2660     cols += ncols;
2661   }
2662   /* insert received matrix entries */
2663   for (imdex=0; imdex<nrecvs; imdex++){
2664     rstart = rowrange[recv_rank[imdex]];
2665     rend   = rowrange[recv_rank[imdex]+1];
2666     rptr = rbuf_j[imdex];
2667     cols = rbuf_j[imdex] + rend-rstart + 1;
2668     vals = rbuf_a[imdex];
2669     for (i=0; i<rend-rstart; i++){
2670       row   = i + rstart;
2671       ncols = rptr[i+1] - rptr[i];
2672       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2673       vals += ncols;
2674       cols += ncols;
2675     }
2676   }
2677   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2678   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2679   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2680   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2681   if (reuse == MAT_INITIAL_MATRIX){
2682     PetscContainer container;
2683     *matredundant = C;
2684     /* create a supporting struct and attach it to C for reuse */
2685     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2686     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2687     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2688     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2689     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2690 
2691     redund->nzlocal = nzlocal;
2692     redund->nsends  = nsends;
2693     redund->nrecvs  = nrecvs;
2694     redund->send_rank = send_rank;
2695     redund->recv_rank = recv_rank;
2696     redund->sbuf_nz = sbuf_nz;
2697     redund->rbuf_nz = rbuf_nz;
2698     redund->sbuf_j  = sbuf_j;
2699     redund->sbuf_a  = sbuf_a;
2700     redund->rbuf_j  = rbuf_j;
2701     redund->rbuf_a  = rbuf_a;
2702 
2703     redund->MatDestroy = C->ops->destroy;
2704     C->ops->destroy    = MatDestroy_MatRedundant;
2705   }
2706   PetscFunctionReturn(0);
2707 }
2708 
2709 #undef __FUNCT__
2710 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2711 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2712 {
2713   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2714   PetscErrorCode ierr;
2715   PetscInt       i,*idxb = 0;
2716   PetscScalar    *va,*vb;
2717   Vec            vtmp;
2718 
2719   PetscFunctionBegin;
2720   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2721   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2722   if (idx) {
2723     for (i=0; i<A->rmap->n; i++) {
2724       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2725     }
2726   }
2727 
2728   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2729   if (idx) {
2730     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2731   }
2732   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2733   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2734 
2735   for (i=0; i<A->rmap->n; i++){
2736     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2737       va[i] = vb[i];
2738       if (idx) idx[i] = a->garray[idxb[i]];
2739     }
2740   }
2741 
2742   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2743   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2744   ierr = PetscFree(idxb);CHKERRQ(ierr);
2745   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 #undef __FUNCT__
2750 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2751 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2752 {
2753   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2754   PetscErrorCode ierr;
2755   PetscInt       i,*idxb = 0;
2756   PetscScalar    *va,*vb;
2757   Vec            vtmp;
2758 
2759   PetscFunctionBegin;
2760   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2761   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2762   if (idx) {
2763     for (i=0; i<A->cmap->n; i++) {
2764       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2765     }
2766   }
2767 
2768   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2769   if (idx) {
2770     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2771   }
2772   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2773   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2774 
2775   for (i=0; i<A->rmap->n; i++){
2776     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2777       va[i] = vb[i];
2778       if (idx) idx[i] = a->garray[idxb[i]];
2779     }
2780   }
2781 
2782   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2783   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2784   ierr = PetscFree(idxb);CHKERRQ(ierr);
2785   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2786   PetscFunctionReturn(0);
2787 }
2788 
2789 #undef __FUNCT__
2790 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2791 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2792 {
2793   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2794   PetscInt       n      = A->rmap->n;
2795   PetscInt       cstart = A->cmap->rstart;
2796   PetscInt      *cmap   = mat->garray;
2797   PetscInt      *diagIdx, *offdiagIdx;
2798   Vec            diagV, offdiagV;
2799   PetscScalar   *a, *diagA, *offdiagA;
2800   PetscInt       r;
2801   PetscErrorCode ierr;
2802 
2803   PetscFunctionBegin;
2804   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2805   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2806   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2807   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2808   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2809   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2810   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2811   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2812   for(r = 0; r < n; ++r) {
2813     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2814       a[r]   = diagA[r];
2815       idx[r] = cstart + diagIdx[r];
2816     } else {
2817       a[r]   = offdiagA[r];
2818       idx[r] = cmap[offdiagIdx[r]];
2819     }
2820   }
2821   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2822   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2823   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2824   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2825   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2826   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2827   PetscFunctionReturn(0);
2828 }
2829 
2830 #undef __FUNCT__
2831 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2832 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2833 {
2834   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2835   PetscInt       n      = A->rmap->n;
2836   PetscInt       cstart = A->cmap->rstart;
2837   PetscInt      *cmap   = mat->garray;
2838   PetscInt      *diagIdx, *offdiagIdx;
2839   Vec            diagV, offdiagV;
2840   PetscScalar   *a, *diagA, *offdiagA;
2841   PetscInt       r;
2842   PetscErrorCode ierr;
2843 
2844   PetscFunctionBegin;
2845   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2846   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2847   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2848   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2849   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2850   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2851   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2852   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2853   for(r = 0; r < n; ++r) {
2854     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2855       a[r]   = diagA[r];
2856       idx[r] = cstart + diagIdx[r];
2857     } else {
2858       a[r]   = offdiagA[r];
2859       idx[r] = cmap[offdiagIdx[r]];
2860     }
2861   }
2862   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2863   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2864   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2865   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2866   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2867   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2868   PetscFunctionReturn(0);
2869 }
2870 
2871 #undef __FUNCT__
2872 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2873 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2874 {
2875   PetscErrorCode ierr;
2876   Mat            *dummy;
2877 
2878   PetscFunctionBegin;
2879   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2880   *newmat = *dummy;
2881   ierr = PetscFree(dummy);CHKERRQ(ierr);
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2886 /* -------------------------------------------------------------------*/
2887 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2888        MatGetRow_MPIAIJ,
2889        MatRestoreRow_MPIAIJ,
2890        MatMult_MPIAIJ,
2891 /* 4*/ MatMultAdd_MPIAIJ,
2892        MatMultTranspose_MPIAIJ,
2893        MatMultTransposeAdd_MPIAIJ,
2894 #ifdef PETSC_HAVE_PBGL
2895        MatSolve_MPIAIJ,
2896 #else
2897        0,
2898 #endif
2899        0,
2900        0,
2901 /*10*/ 0,
2902        0,
2903        0,
2904        MatSOR_MPIAIJ,
2905        MatTranspose_MPIAIJ,
2906 /*15*/ MatGetInfo_MPIAIJ,
2907        MatEqual_MPIAIJ,
2908        MatGetDiagonal_MPIAIJ,
2909        MatDiagonalScale_MPIAIJ,
2910        MatNorm_MPIAIJ,
2911 /*20*/ MatAssemblyBegin_MPIAIJ,
2912        MatAssemblyEnd_MPIAIJ,
2913        MatSetOption_MPIAIJ,
2914        MatZeroEntries_MPIAIJ,
2915 /*24*/ MatZeroRows_MPIAIJ,
2916        0,
2917 #ifdef PETSC_HAVE_PBGL
2918        0,
2919 #else
2920        0,
2921 #endif
2922        0,
2923        0,
2924 /*29*/ MatSetUpPreallocation_MPIAIJ,
2925 #ifdef PETSC_HAVE_PBGL
2926        0,
2927 #else
2928        0,
2929 #endif
2930        0,
2931        0,
2932        0,
2933 /*34*/ MatDuplicate_MPIAIJ,
2934        0,
2935        0,
2936        0,
2937        0,
2938 /*39*/ MatAXPY_MPIAIJ,
2939        MatGetSubMatrices_MPIAIJ,
2940        MatIncreaseOverlap_MPIAIJ,
2941        MatGetValues_MPIAIJ,
2942        MatCopy_MPIAIJ,
2943 /*44*/ MatGetRowMax_MPIAIJ,
2944        MatScale_MPIAIJ,
2945        0,
2946        0,
2947        MatZeroRowsColumns_MPIAIJ,
2948 /*49*/ MatSetBlockSize_MPIAIJ,
2949        0,
2950        0,
2951        0,
2952        0,
2953 /*54*/ MatFDColoringCreate_MPIAIJ,
2954        0,
2955        MatSetUnfactored_MPIAIJ,
2956        MatPermute_MPIAIJ,
2957        0,
2958 /*59*/ MatGetSubMatrix_MPIAIJ,
2959        MatDestroy_MPIAIJ,
2960        MatView_MPIAIJ,
2961        0,
2962        0,
2963 /*64*/ 0,
2964        0,
2965        0,
2966        0,
2967        0,
2968 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2969        MatGetRowMinAbs_MPIAIJ,
2970        0,
2971        MatSetColoring_MPIAIJ,
2972 #if defined(PETSC_HAVE_ADIC)
2973        MatSetValuesAdic_MPIAIJ,
2974 #else
2975        0,
2976 #endif
2977        MatSetValuesAdifor_MPIAIJ,
2978 /*75*/ MatFDColoringApply_AIJ,
2979        0,
2980        0,
2981        0,
2982        0,
2983 /*80*/ 0,
2984        0,
2985        0,
2986 /*83*/ MatLoad_MPIAIJ,
2987        0,
2988        0,
2989        0,
2990        0,
2991        0,
2992 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2993        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2994        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2995        MatPtAP_Basic,
2996        MatPtAPSymbolic_MPIAIJ,
2997 /*94*/ MatPtAPNumeric_MPIAIJ,
2998        0,
2999        0,
3000        0,
3001        0,
3002 /*99*/ 0,
3003        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3004        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3005        MatConjugate_MPIAIJ,
3006        0,
3007 /*104*/MatSetValuesRow_MPIAIJ,
3008        MatRealPart_MPIAIJ,
3009        MatImaginaryPart_MPIAIJ,
3010        0,
3011        0,
3012 /*109*/0,
3013        MatGetRedundantMatrix_MPIAIJ,
3014        MatGetRowMin_MPIAIJ,
3015        0,
3016        0,
3017 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
3018        0,
3019        0,
3020        0,
3021        0,
3022 /*119*/0,
3023        0,
3024        0,
3025        0,
3026        MatGetMultiProcBlock_MPIAIJ,
3027 /*124*/MatFindNonZeroRows_MPIAIJ,
3028        0,
3029        0,
3030        0,
3031        MatGetSubMatricesParallel_MPIAIJ
3032 };
3033 
3034 /* ----------------------------------------------------------------------------------------*/
3035 
3036 EXTERN_C_BEGIN
3037 #undef __FUNCT__
3038 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3039 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3040 {
3041   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3042   PetscErrorCode ierr;
3043 
3044   PetscFunctionBegin;
3045   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3046   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3047   PetscFunctionReturn(0);
3048 }
3049 EXTERN_C_END
3050 
3051 EXTERN_C_BEGIN
3052 #undef __FUNCT__
3053 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3054 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3055 {
3056   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3057   PetscErrorCode ierr;
3058 
3059   PetscFunctionBegin;
3060   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3061   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3062   PetscFunctionReturn(0);
3063 }
3064 EXTERN_C_END
3065 
3066 EXTERN_C_BEGIN
3067 #undef __FUNCT__
3068 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3069 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3070 {
3071   Mat_MPIAIJ     *b;
3072   PetscErrorCode ierr;
3073   PetscInt       i;
3074 
3075   PetscFunctionBegin;
3076   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3077   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3078   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3079   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3080 
3081   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3082   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3083   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3084   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3085   if (d_nnz) {
3086     for (i=0; i<B->rmap->n; i++) {
3087       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3088     }
3089   }
3090   if (o_nnz) {
3091     for (i=0; i<B->rmap->n; i++) {
3092       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3093     }
3094   }
3095   b = (Mat_MPIAIJ*)B->data;
3096 
3097   if (!B->preallocated) {
3098     /* Explicitly create 2 MATSEQAIJ matrices. */
3099     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3100     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3101     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3102     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3103     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3104     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3105     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3106     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3107   }
3108 
3109   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3110   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3111   B->preallocated = PETSC_TRUE;
3112   PetscFunctionReturn(0);
3113 }
3114 EXTERN_C_END
3115 
3116 #undef __FUNCT__
3117 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3118 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3119 {
3120   Mat            mat;
3121   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3122   PetscErrorCode ierr;
3123 
3124   PetscFunctionBegin;
3125   *newmat       = 0;
3126   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3127   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3128   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3129   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3130   a    = (Mat_MPIAIJ*)mat->data;
3131 
3132   mat->factortype    = matin->factortype;
3133   mat->rmap->bs      = matin->rmap->bs;
3134   mat->assembled    = PETSC_TRUE;
3135   mat->insertmode   = NOT_SET_VALUES;
3136   mat->preallocated = PETSC_TRUE;
3137 
3138   a->size           = oldmat->size;
3139   a->rank           = oldmat->rank;
3140   a->donotstash     = oldmat->donotstash;
3141   a->roworiented    = oldmat->roworiented;
3142   a->rowindices     = 0;
3143   a->rowvalues      = 0;
3144   a->getrowactive   = PETSC_FALSE;
3145 
3146   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3147   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3148 
3149   if (oldmat->colmap) {
3150 #if defined (PETSC_USE_CTABLE)
3151     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3152 #else
3153     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3154     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3155     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3156 #endif
3157   } else a->colmap = 0;
3158   if (oldmat->garray) {
3159     PetscInt len;
3160     len  = oldmat->B->cmap->n;
3161     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3162     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3163     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3164   } else a->garray = 0;
3165 
3166   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3167   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3168   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3169   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3170   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3171   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3172   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3173   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3174   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3175   *newmat = mat;
3176   PetscFunctionReturn(0);
3177 }
3178 
3179 /*
3180     Allows sending/receiving larger messages then 2 gigabytes in a single call
3181 */
3182 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
3183 {
3184   int             ierr;
3185   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3186   PetscInt        i,numchunks;
3187   PetscMPIInt     icnt;
3188 
3189   numchunks = cnt/CHUNKSIZE + 1;
3190   for (i=0; i<numchunks; i++) {
3191     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3192     ierr = MPI_Send(mess,icnt,type,to,tag,comm);if (ierr) return ierr;
3193     if (type == MPIU_INT) {
3194       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3195     } else if (type == MPIU_SCALAR) {
3196       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3197     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3198   }
3199   return 0;
3200 }
3201 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
3202 {
3203   int             ierr;
3204   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3205   MPI_Status      status;
3206   PetscInt        i,numchunks;
3207   PetscMPIInt     icnt;
3208 
3209   numchunks = cnt/CHUNKSIZE + 1;
3210   for (i=0; i<numchunks; i++) {
3211     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3212     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);if (ierr) return ierr;
3213     if (type == MPIU_INT) {
3214       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3215     } else if (type == MPIU_SCALAR) {
3216       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3217     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3218   }
3219   return 0;
3220 }
3221 
3222 #undef __FUNCT__
3223 #define __FUNCT__ "MatLoad_MPIAIJ"
3224 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3225 {
3226   PetscScalar    *vals,*svals;
3227   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3228   PetscErrorCode ierr;
3229   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3230   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3231   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3232   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3233   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3234   int            fd;
3235 
3236   PetscFunctionBegin;
3237   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3238   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3239   if (!rank) {
3240     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3241     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3242     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3243   }
3244 
3245   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3246 
3247   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3248   M = header[1]; N = header[2];
3249   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3250   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3251   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3252 
3253   /* If global sizes are set, check if they are consistent with that given in the file */
3254   if (sizesset) {
3255     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3256   }
3257   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3258   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3259 
3260   /* determine ownership of all rows */
3261   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3262   else m = newMat->rmap->n; /* Set by user */
3263 
3264   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3265   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3266 
3267   /* First process needs enough room for process with most rows */
3268   if (!rank) {
3269     mmax       = rowners[1];
3270     for (i=2; i<size; i++) {
3271       mmax = PetscMax(mmax,rowners[i]);
3272     }
3273   } else mmax = m;
3274 
3275   rowners[0] = 0;
3276   for (i=2; i<=size; i++) {
3277     rowners[i] += rowners[i-1];
3278   }
3279   rstart = rowners[rank];
3280   rend   = rowners[rank+1];
3281 
3282   /* distribute row lengths to all processors */
3283   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3284   if (!rank) {
3285     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3286     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3287     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3288     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3289     for (j=0; j<m; j++) {
3290       procsnz[0] += ourlens[j];
3291     }
3292     for (i=1; i<size; i++) {
3293       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3294       /* calculate the number of nonzeros on each processor */
3295       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3296         procsnz[i] += rowlengths[j];
3297       }
3298       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3299     }
3300     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3301   } else {
3302     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3303   }
3304 
3305   if (!rank) {
3306     /* determine max buffer needed and allocate it */
3307     maxnz = 0;
3308     for (i=0; i<size; i++) {
3309       maxnz = PetscMax(maxnz,procsnz[i]);
3310     }
3311     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3312 
3313     /* read in my part of the matrix column indices  */
3314     nz   = procsnz[0];
3315     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3316     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3317 
3318     /* read in every one elses and ship off */
3319     for (i=1; i<size; i++) {
3320       nz     = procsnz[i];
3321       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3322       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3323     }
3324     ierr = PetscFree(cols);CHKERRQ(ierr);
3325   } else {
3326     /* determine buffer space needed for message */
3327     nz = 0;
3328     for (i=0; i<m; i++) {
3329       nz += ourlens[i];
3330     }
3331     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3332 
3333     /* receive message of column indices*/
3334     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3335   }
3336 
3337   /* determine column ownership if matrix is not square */
3338   if (N != M) {
3339     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3340     else n = newMat->cmap->n;
3341     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3342     cstart = cend - n;
3343   } else {
3344     cstart = rstart;
3345     cend   = rend;
3346     n      = cend - cstart;
3347   }
3348 
3349   /* loop over local rows, determining number of off diagonal entries */
3350   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3351   jj = 0;
3352   for (i=0; i<m; i++) {
3353     for (j=0; j<ourlens[i]; j++) {
3354       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3355       jj++;
3356     }
3357   }
3358 
3359   for (i=0; i<m; i++) {
3360     ourlens[i] -= offlens[i];
3361   }
3362   if (!sizesset) {
3363     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3364   }
3365   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3366 
3367   for (i=0; i<m; i++) {
3368     ourlens[i] += offlens[i];
3369   }
3370 
3371   if (!rank) {
3372     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3373 
3374     /* read in my part of the matrix numerical values  */
3375     nz   = procsnz[0];
3376     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3377 
3378     /* insert into matrix */
3379     jj      = rstart;
3380     smycols = mycols;
3381     svals   = vals;
3382     for (i=0; i<m; i++) {
3383       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3384       smycols += ourlens[i];
3385       svals   += ourlens[i];
3386       jj++;
3387     }
3388 
3389     /* read in other processors and ship out */
3390     for (i=1; i<size; i++) {
3391       nz     = procsnz[i];
3392       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3393       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3394     }
3395     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3396   } else {
3397     /* receive numeric values */
3398     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3399 
3400     /* receive message of values*/
3401     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3402 
3403     /* insert into matrix */
3404     jj      = rstart;
3405     smycols = mycols;
3406     svals   = vals;
3407     for (i=0; i<m; i++) {
3408       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3409       smycols += ourlens[i];
3410       svals   += ourlens[i];
3411       jj++;
3412     }
3413   }
3414   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3415   ierr = PetscFree(vals);CHKERRQ(ierr);
3416   ierr = PetscFree(mycols);CHKERRQ(ierr);
3417   ierr = PetscFree(rowners);CHKERRQ(ierr);
3418 
3419   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3420   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3421   PetscFunctionReturn(0);
3422 }
3423 
3424 #undef __FUNCT__
3425 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3426 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3427 {
3428   PetscErrorCode ierr;
3429   IS             iscol_local;
3430   PetscInt       csize;
3431 
3432   PetscFunctionBegin;
3433   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3434   if (call == MAT_REUSE_MATRIX) {
3435     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3436     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3437   } else {
3438     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3439   }
3440   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3441   if (call == MAT_INITIAL_MATRIX) {
3442     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3443     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3444   }
3445   PetscFunctionReturn(0);
3446 }
3447 
3448 #undef __FUNCT__
3449 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3450 /*
3451     Not great since it makes two copies of the submatrix, first an SeqAIJ
3452   in local and then by concatenating the local matrices the end result.
3453   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3454 
3455   Note: This requires a sequential iscol with all indices.
3456 */
3457 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3458 {
3459   PetscErrorCode ierr;
3460   PetscMPIInt    rank,size;
3461   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3462   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3463   Mat            *local,M,Mreuse;
3464   MatScalar      *vwork,*aa;
3465   MPI_Comm       comm = ((PetscObject)mat)->comm;
3466   Mat_SeqAIJ     *aij;
3467 
3468 
3469   PetscFunctionBegin;
3470   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3471   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3472 
3473   if (call ==  MAT_REUSE_MATRIX) {
3474     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3475     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3476     local = &Mreuse;
3477     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3478   } else {
3479     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3480     Mreuse = *local;
3481     ierr   = PetscFree(local);CHKERRQ(ierr);
3482   }
3483 
3484   /*
3485       m - number of local rows
3486       n - number of columns (same on all processors)
3487       rstart - first row in new global matrix generated
3488   */
3489   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3490   if (call == MAT_INITIAL_MATRIX) {
3491     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3492     ii  = aij->i;
3493     jj  = aij->j;
3494 
3495     /*
3496         Determine the number of non-zeros in the diagonal and off-diagonal
3497         portions of the matrix in order to do correct preallocation
3498     */
3499 
3500     /* first get start and end of "diagonal" columns */
3501     if (csize == PETSC_DECIDE) {
3502       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3503       if (mglobal == n) { /* square matrix */
3504 	nlocal = m;
3505       } else {
3506         nlocal = n/size + ((n % size) > rank);
3507       }
3508     } else {
3509       nlocal = csize;
3510     }
3511     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3512     rstart = rend - nlocal;
3513     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3514 
3515     /* next, compute all the lengths */
3516     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3517     olens = dlens + m;
3518     for (i=0; i<m; i++) {
3519       jend = ii[i+1] - ii[i];
3520       olen = 0;
3521       dlen = 0;
3522       for (j=0; j<jend; j++) {
3523         if (*jj < rstart || *jj >= rend) olen++;
3524         else dlen++;
3525         jj++;
3526       }
3527       olens[i] = olen;
3528       dlens[i] = dlen;
3529     }
3530     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3531     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3532     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3533     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3534     ierr = PetscFree(dlens);CHKERRQ(ierr);
3535   } else {
3536     PetscInt ml,nl;
3537 
3538     M = *newmat;
3539     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3540     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3541     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3542     /*
3543          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3544        rather than the slower MatSetValues().
3545     */
3546     M->was_assembled = PETSC_TRUE;
3547     M->assembled     = PETSC_FALSE;
3548   }
3549   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3550   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3551   ii  = aij->i;
3552   jj  = aij->j;
3553   aa  = aij->a;
3554   for (i=0; i<m; i++) {
3555     row   = rstart + i;
3556     nz    = ii[i+1] - ii[i];
3557     cwork = jj;     jj += nz;
3558     vwork = aa;     aa += nz;
3559     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3560   }
3561 
3562   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3563   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3564   *newmat = M;
3565 
3566   /* save submatrix used in processor for next request */
3567   if (call ==  MAT_INITIAL_MATRIX) {
3568     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3569     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3570   }
3571 
3572   PetscFunctionReturn(0);
3573 }
3574 
3575 EXTERN_C_BEGIN
3576 #undef __FUNCT__
3577 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3578 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3579 {
3580   PetscInt       m,cstart, cend,j,nnz,i,d;
3581   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3582   const PetscInt *JJ;
3583   PetscScalar    *values;
3584   PetscErrorCode ierr;
3585 
3586   PetscFunctionBegin;
3587   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3588 
3589   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3590   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3591   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3592   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3593   m      = B->rmap->n;
3594   cstart = B->cmap->rstart;
3595   cend   = B->cmap->rend;
3596   rstart = B->rmap->rstart;
3597 
3598   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3599 
3600 #if defined(PETSC_USE_DEBUGGING)
3601   for (i=0; i<m; i++) {
3602     nnz     = Ii[i+1]- Ii[i];
3603     JJ      = J + Ii[i];
3604     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3605     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3606     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3607   }
3608 #endif
3609 
3610   for (i=0; i<m; i++) {
3611     nnz     = Ii[i+1]- Ii[i];
3612     JJ      = J + Ii[i];
3613     nnz_max = PetscMax(nnz_max,nnz);
3614     d       = 0;
3615     for (j=0; j<nnz; j++) {
3616       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3617     }
3618     d_nnz[i] = d;
3619     o_nnz[i] = nnz - d;
3620   }
3621   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3622   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3623 
3624   if (v) values = (PetscScalar*)v;
3625   else {
3626     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3627     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3628   }
3629 
3630   for (i=0; i<m; i++) {
3631     ii   = i + rstart;
3632     nnz  = Ii[i+1]- Ii[i];
3633     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3634   }
3635   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3636   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3637 
3638   if (!v) {
3639     ierr = PetscFree(values);CHKERRQ(ierr);
3640   }
3641   PetscFunctionReturn(0);
3642 }
3643 EXTERN_C_END
3644 
3645 #undef __FUNCT__
3646 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3647 /*@
3648    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3649    (the default parallel PETSc format).
3650 
3651    Collective on MPI_Comm
3652 
3653    Input Parameters:
3654 +  B - the matrix
3655 .  i - the indices into j for the start of each local row (starts with zero)
3656 .  j - the column indices for each local row (starts with zero)
3657 -  v - optional values in the matrix
3658 
3659    Level: developer
3660 
3661    Notes:
3662        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3663      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3664      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3665 
3666        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3667 
3668        The format which is used for the sparse matrix input, is equivalent to a
3669     row-major ordering.. i.e for the following matrix, the input data expected is
3670     as shown:
3671 
3672         1 0 0
3673         2 0 3     P0
3674        -------
3675         4 5 6     P1
3676 
3677      Process0 [P0]: rows_owned=[0,1]
3678         i =  {0,1,3}  [size = nrow+1  = 2+1]
3679         j =  {0,0,2}  [size = nz = 6]
3680         v =  {1,2,3}  [size = nz = 6]
3681 
3682      Process1 [P1]: rows_owned=[2]
3683         i =  {0,3}    [size = nrow+1  = 1+1]
3684         j =  {0,1,2}  [size = nz = 6]
3685         v =  {4,5,6}  [size = nz = 6]
3686 
3687 .keywords: matrix, aij, compressed row, sparse, parallel
3688 
3689 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3690           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3691 @*/
3692 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3693 {
3694   PetscErrorCode ierr;
3695 
3696   PetscFunctionBegin;
3697   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3698   PetscFunctionReturn(0);
3699 }
3700 
3701 #undef __FUNCT__
3702 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3703 /*@C
3704    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3705    (the default parallel PETSc format).  For good matrix assembly performance
3706    the user should preallocate the matrix storage by setting the parameters
3707    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3708    performance can be increased by more than a factor of 50.
3709 
3710    Collective on MPI_Comm
3711 
3712    Input Parameters:
3713 +  A - the matrix
3714 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3715            (same value is used for all local rows)
3716 .  d_nnz - array containing the number of nonzeros in the various rows of the
3717            DIAGONAL portion of the local submatrix (possibly different for each row)
3718            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3719            The size of this array is equal to the number of local rows, i.e 'm'.
3720            You must leave room for the diagonal entry even if it is zero.
3721 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3722            submatrix (same value is used for all local rows).
3723 -  o_nnz - array containing the number of nonzeros in the various rows of the
3724            OFF-DIAGONAL portion of the local submatrix (possibly different for
3725            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3726            structure. The size of this array is equal to the number
3727            of local rows, i.e 'm'.
3728 
3729    If the *_nnz parameter is given then the *_nz parameter is ignored
3730 
3731    The AIJ format (also called the Yale sparse matrix format or
3732    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3733    storage.  The stored row and column indices begin with zero.
3734    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3735 
3736    The parallel matrix is partitioned such that the first m0 rows belong to
3737    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3738    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3739 
3740    The DIAGONAL portion of the local submatrix of a processor can be defined
3741    as the submatrix which is obtained by extraction the part corresponding to
3742    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3743    first row that belongs to the processor, r2 is the last row belonging to
3744    the this processor, and c1-c2 is range of indices of the local part of a
3745    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3746    common case of a square matrix, the row and column ranges are the same and
3747    the DIAGONAL part is also square. The remaining portion of the local
3748    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3749 
3750    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3751 
3752    You can call MatGetInfo() to get information on how effective the preallocation was;
3753    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3754    You can also run with the option -info and look for messages with the string
3755    malloc in them to see if additional memory allocation was needed.
3756 
3757    Example usage:
3758 
3759    Consider the following 8x8 matrix with 34 non-zero values, that is
3760    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3761    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3762    as follows:
3763 
3764 .vb
3765             1  2  0  |  0  3  0  |  0  4
3766     Proc0   0  5  6  |  7  0  0  |  8  0
3767             9  0 10  | 11  0  0  | 12  0
3768     -------------------------------------
3769            13  0 14  | 15 16 17  |  0  0
3770     Proc1   0 18  0  | 19 20 21  |  0  0
3771             0  0  0  | 22 23  0  | 24  0
3772     -------------------------------------
3773     Proc2  25 26 27  |  0  0 28  | 29  0
3774            30  0  0  | 31 32 33  |  0 34
3775 .ve
3776 
3777    This can be represented as a collection of submatrices as:
3778 
3779 .vb
3780       A B C
3781       D E F
3782       G H I
3783 .ve
3784 
3785    Where the submatrices A,B,C are owned by proc0, D,E,F are
3786    owned by proc1, G,H,I are owned by proc2.
3787 
3788    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3789    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3790    The 'M','N' parameters are 8,8, and have the same values on all procs.
3791 
3792    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3793    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3794    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3795    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3796    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3797    matrix, ans [DF] as another SeqAIJ matrix.
3798 
3799    When d_nz, o_nz parameters are specified, d_nz storage elements are
3800    allocated for every row of the local diagonal submatrix, and o_nz
3801    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3802    One way to choose d_nz and o_nz is to use the max nonzerors per local
3803    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3804    In this case, the values of d_nz,o_nz are:
3805 .vb
3806      proc0 : dnz = 2, o_nz = 2
3807      proc1 : dnz = 3, o_nz = 2
3808      proc2 : dnz = 1, o_nz = 4
3809 .ve
3810    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3811    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3812    for proc3. i.e we are using 12+15+10=37 storage locations to store
3813    34 values.
3814 
3815    When d_nnz, o_nnz parameters are specified, the storage is specified
3816    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3817    In the above case the values for d_nnz,o_nnz are:
3818 .vb
3819      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3820      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3821      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3822 .ve
3823    Here the space allocated is sum of all the above values i.e 34, and
3824    hence pre-allocation is perfect.
3825 
3826    Level: intermediate
3827 
3828 .keywords: matrix, aij, compressed row, sparse, parallel
3829 
3830 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3831           MPIAIJ, MatGetInfo()
3832 @*/
3833 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3834 {
3835   PetscErrorCode ierr;
3836 
3837   PetscFunctionBegin;
3838   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3839   PetscFunctionReturn(0);
3840 }
3841 
3842 #undef __FUNCT__
3843 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3844 /*@
3845      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3846          CSR format the local rows.
3847 
3848    Collective on MPI_Comm
3849 
3850    Input Parameters:
3851 +  comm - MPI communicator
3852 .  m - number of local rows (Cannot be PETSC_DECIDE)
3853 .  n - This value should be the same as the local size used in creating the
3854        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3855        calculated if N is given) For square matrices n is almost always m.
3856 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3857 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3858 .   i - row indices
3859 .   j - column indices
3860 -   a - matrix values
3861 
3862    Output Parameter:
3863 .   mat - the matrix
3864 
3865    Level: intermediate
3866 
3867    Notes:
3868        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3869      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3870      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3871 
3872        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3873 
3874        The format which is used for the sparse matrix input, is equivalent to a
3875     row-major ordering.. i.e for the following matrix, the input data expected is
3876     as shown:
3877 
3878         1 0 0
3879         2 0 3     P0
3880        -------
3881         4 5 6     P1
3882 
3883      Process0 [P0]: rows_owned=[0,1]
3884         i =  {0,1,3}  [size = nrow+1  = 2+1]
3885         j =  {0,0,2}  [size = nz = 6]
3886         v =  {1,2,3}  [size = nz = 6]
3887 
3888      Process1 [P1]: rows_owned=[2]
3889         i =  {0,3}    [size = nrow+1  = 1+1]
3890         j =  {0,1,2}  [size = nz = 6]
3891         v =  {4,5,6}  [size = nz = 6]
3892 
3893 .keywords: matrix, aij, compressed row, sparse, parallel
3894 
3895 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3896           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3897 @*/
3898 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3899 {
3900   PetscErrorCode ierr;
3901 
3902  PetscFunctionBegin;
3903   if (i[0]) {
3904     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3905   }
3906   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3907   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3908   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3909   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3910   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3911   PetscFunctionReturn(0);
3912 }
3913 
3914 #undef __FUNCT__
3915 #define __FUNCT__ "MatCreateMPIAIJ"
3916 /*@C
3917    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3918    (the default parallel PETSc format).  For good matrix assembly performance
3919    the user should preallocate the matrix storage by setting the parameters
3920    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3921    performance can be increased by more than a factor of 50.
3922 
3923    Collective on MPI_Comm
3924 
3925    Input Parameters:
3926 +  comm - MPI communicator
3927 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3928            This value should be the same as the local size used in creating the
3929            y vector for the matrix-vector product y = Ax.
3930 .  n - This value should be the same as the local size used in creating the
3931        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3932        calculated if N is given) For square matrices n is almost always m.
3933 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3934 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3935 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3936            (same value is used for all local rows)
3937 .  d_nnz - array containing the number of nonzeros in the various rows of the
3938            DIAGONAL portion of the local submatrix (possibly different for each row)
3939            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3940            The size of this array is equal to the number of local rows, i.e 'm'.
3941            You must leave room for the diagonal entry even if it is zero.
3942 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3943            submatrix (same value is used for all local rows).
3944 -  o_nnz - array containing the number of nonzeros in the various rows of the
3945            OFF-DIAGONAL portion of the local submatrix (possibly different for
3946            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3947            structure. The size of this array is equal to the number
3948            of local rows, i.e 'm'.
3949 
3950    Output Parameter:
3951 .  A - the matrix
3952 
3953    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3954    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3955    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3956 
3957    Notes:
3958    If the *_nnz parameter is given then the *_nz parameter is ignored
3959 
3960    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3961    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3962    storage requirements for this matrix.
3963 
3964    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3965    processor than it must be used on all processors that share the object for
3966    that argument.
3967 
3968    The user MUST specify either the local or global matrix dimensions
3969    (possibly both).
3970 
3971    The parallel matrix is partitioned across processors such that the
3972    first m0 rows belong to process 0, the next m1 rows belong to
3973    process 1, the next m2 rows belong to process 2 etc.. where
3974    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3975    values corresponding to [m x N] submatrix.
3976 
3977    The columns are logically partitioned with the n0 columns belonging
3978    to 0th partition, the next n1 columns belonging to the next
3979    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3980 
3981    The DIAGONAL portion of the local submatrix on any given processor
3982    is the submatrix corresponding to the rows and columns m,n
3983    corresponding to the given processor. i.e diagonal matrix on
3984    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3985    etc. The remaining portion of the local submatrix [m x (N-n)]
3986    constitute the OFF-DIAGONAL portion. The example below better
3987    illustrates this concept.
3988 
3989    For a square global matrix we define each processor's diagonal portion
3990    to be its local rows and the corresponding columns (a square submatrix);
3991    each processor's off-diagonal portion encompasses the remainder of the
3992    local matrix (a rectangular submatrix).
3993 
3994    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3995 
3996    When calling this routine with a single process communicator, a matrix of
3997    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3998    type of communicator, use the construction mechanism:
3999      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4000 
4001    By default, this format uses inodes (identical nodes) when possible.
4002    We search for consecutive rows with the same nonzero structure, thereby
4003    reusing matrix information to achieve increased efficiency.
4004 
4005    Options Database Keys:
4006 +  -mat_no_inode  - Do not use inodes
4007 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4008 -  -mat_aij_oneindex - Internally use indexing starting at 1
4009         rather than 0.  Note that when calling MatSetValues(),
4010         the user still MUST index entries starting at 0!
4011 
4012 
4013    Example usage:
4014 
4015    Consider the following 8x8 matrix with 34 non-zero values, that is
4016    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4017    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4018    as follows:
4019 
4020 .vb
4021             1  2  0  |  0  3  0  |  0  4
4022     Proc0   0  5  6  |  7  0  0  |  8  0
4023             9  0 10  | 11  0  0  | 12  0
4024     -------------------------------------
4025            13  0 14  | 15 16 17  |  0  0
4026     Proc1   0 18  0  | 19 20 21  |  0  0
4027             0  0  0  | 22 23  0  | 24  0
4028     -------------------------------------
4029     Proc2  25 26 27  |  0  0 28  | 29  0
4030            30  0  0  | 31 32 33  |  0 34
4031 .ve
4032 
4033    This can be represented as a collection of submatrices as:
4034 
4035 .vb
4036       A B C
4037       D E F
4038       G H I
4039 .ve
4040 
4041    Where the submatrices A,B,C are owned by proc0, D,E,F are
4042    owned by proc1, G,H,I are owned by proc2.
4043 
4044    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4045    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4046    The 'M','N' parameters are 8,8, and have the same values on all procs.
4047 
4048    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4049    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4050    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4051    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4052    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4053    matrix, ans [DF] as another SeqAIJ matrix.
4054 
4055    When d_nz, o_nz parameters are specified, d_nz storage elements are
4056    allocated for every row of the local diagonal submatrix, and o_nz
4057    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4058    One way to choose d_nz and o_nz is to use the max nonzerors per local
4059    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4060    In this case, the values of d_nz,o_nz are:
4061 .vb
4062      proc0 : dnz = 2, o_nz = 2
4063      proc1 : dnz = 3, o_nz = 2
4064      proc2 : dnz = 1, o_nz = 4
4065 .ve
4066    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4067    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4068    for proc3. i.e we are using 12+15+10=37 storage locations to store
4069    34 values.
4070 
4071    When d_nnz, o_nnz parameters are specified, the storage is specified
4072    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4073    In the above case the values for d_nnz,o_nnz are:
4074 .vb
4075      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4076      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4077      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4078 .ve
4079    Here the space allocated is sum of all the above values i.e 34, and
4080    hence pre-allocation is perfect.
4081 
4082    Level: intermediate
4083 
4084 .keywords: matrix, aij, compressed row, sparse, parallel
4085 
4086 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4087           MPIAIJ, MatCreateMPIAIJWithArrays()
4088 @*/
4089 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4090 {
4091   PetscErrorCode ierr;
4092   PetscMPIInt    size;
4093 
4094   PetscFunctionBegin;
4095   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4096   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4097   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4098   if (size > 1) {
4099     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4100     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4101   } else {
4102     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4103     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4104   }
4105   PetscFunctionReturn(0);
4106 }
4107 
4108 #undef __FUNCT__
4109 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4110 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4111 {
4112   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4113 
4114   PetscFunctionBegin;
4115   *Ad     = a->A;
4116   *Ao     = a->B;
4117   *colmap = a->garray;
4118   PetscFunctionReturn(0);
4119 }
4120 
4121 #undef __FUNCT__
4122 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4123 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4124 {
4125   PetscErrorCode ierr;
4126   PetscInt       i;
4127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4128 
4129   PetscFunctionBegin;
4130   if (coloring->ctype == IS_COLORING_GLOBAL) {
4131     ISColoringValue *allcolors,*colors;
4132     ISColoring      ocoloring;
4133 
4134     /* set coloring for diagonal portion */
4135     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4136 
4137     /* set coloring for off-diagonal portion */
4138     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4139     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4140     for (i=0; i<a->B->cmap->n; i++) {
4141       colors[i] = allcolors[a->garray[i]];
4142     }
4143     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4144     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4145     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4146     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4147   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4148     ISColoringValue *colors;
4149     PetscInt        *larray;
4150     ISColoring      ocoloring;
4151 
4152     /* set coloring for diagonal portion */
4153     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4154     for (i=0; i<a->A->cmap->n; i++) {
4155       larray[i] = i + A->cmap->rstart;
4156     }
4157     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4158     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4159     for (i=0; i<a->A->cmap->n; i++) {
4160       colors[i] = coloring->colors[larray[i]];
4161     }
4162     ierr = PetscFree(larray);CHKERRQ(ierr);
4163     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4164     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4165     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4166 
4167     /* set coloring for off-diagonal portion */
4168     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4169     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4170     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4171     for (i=0; i<a->B->cmap->n; i++) {
4172       colors[i] = coloring->colors[larray[i]];
4173     }
4174     ierr = PetscFree(larray);CHKERRQ(ierr);
4175     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4176     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4177     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4178   } else {
4179     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4180   }
4181 
4182   PetscFunctionReturn(0);
4183 }
4184 
4185 #if defined(PETSC_HAVE_ADIC)
4186 #undef __FUNCT__
4187 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4188 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4189 {
4190   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4191   PetscErrorCode ierr;
4192 
4193   PetscFunctionBegin;
4194   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4195   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4196   PetscFunctionReturn(0);
4197 }
4198 #endif
4199 
4200 #undef __FUNCT__
4201 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4202 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4203 {
4204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4205   PetscErrorCode ierr;
4206 
4207   PetscFunctionBegin;
4208   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4209   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4210   PetscFunctionReturn(0);
4211 }
4212 
4213 #undef __FUNCT__
4214 #define __FUNCT__ "MatMerge"
4215 /*@
4216       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4217                  matrices from each processor
4218 
4219     Collective on MPI_Comm
4220 
4221    Input Parameters:
4222 +    comm - the communicators the parallel matrix will live on
4223 .    inmat - the input sequential matrices
4224 .    n - number of local columns (or PETSC_DECIDE)
4225 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4226 
4227    Output Parameter:
4228 .    outmat - the parallel matrix generated
4229 
4230     Level: advanced
4231 
4232    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4233 
4234 @*/
4235 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4236 {
4237   PetscErrorCode ierr;
4238   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4239   PetscInt       *indx;
4240   PetscScalar    *values;
4241 
4242   PetscFunctionBegin;
4243   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4244   if (scall == MAT_INITIAL_MATRIX){
4245     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4246     if (n == PETSC_DECIDE){
4247       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4248     }
4249     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4250     rstart -= m;
4251 
4252     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4253     for (i=0;i<m;i++) {
4254       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4255       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4256       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4257     }
4258     /* This routine will ONLY return MPIAIJ type matrix */
4259     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4260     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4261     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4262     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4263     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4264 
4265   } else if (scall == MAT_REUSE_MATRIX){
4266     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4267   } else {
4268     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4269   }
4270 
4271   for (i=0;i<m;i++) {
4272     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4273     Ii    = i + rstart;
4274     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4275     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4276   }
4277   ierr = MatDestroy(inmat);CHKERRQ(ierr);
4278   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4279   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4280 
4281   PetscFunctionReturn(0);
4282 }
4283 
4284 #undef __FUNCT__
4285 #define __FUNCT__ "MatFileSplit"
4286 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4287 {
4288   PetscErrorCode    ierr;
4289   PetscMPIInt       rank;
4290   PetscInt          m,N,i,rstart,nnz;
4291   size_t            len;
4292   const PetscInt    *indx;
4293   PetscViewer       out;
4294   char              *name;
4295   Mat               B;
4296   const PetscScalar *values;
4297 
4298   PetscFunctionBegin;
4299   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4300   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4301   /* Should this be the type of the diagonal block of A? */
4302   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4303   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4304   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4305   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4306   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4307   for (i=0;i<m;i++) {
4308     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4309     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4310     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4311   }
4312   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4313   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4314 
4315   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4316   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4317   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4318   sprintf(name,"%s.%d",outfile,rank);
4319   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4320   ierr = PetscFree(name);
4321   ierr = MatView(B,out);CHKERRQ(ierr);
4322   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4323   ierr = MatDestroy(B);CHKERRQ(ierr);
4324   PetscFunctionReturn(0);
4325 }
4326 
4327 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4328 #undef __FUNCT__
4329 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4330 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4331 {
4332   PetscErrorCode       ierr;
4333   Mat_Merge_SeqsToMPI  *merge;
4334   PetscContainer       container;
4335 
4336   PetscFunctionBegin;
4337   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4338   if (container) {
4339     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4340     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4341     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4342     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4343     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4344     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4345     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4346     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4347     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4348     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4349     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4350     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4351     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4352     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4353 
4354     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4355     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4356   }
4357   ierr = PetscFree(merge);CHKERRQ(ierr);
4358 
4359   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4360   PetscFunctionReturn(0);
4361 }
4362 
4363 #include "../src/mat/utils/freespace.h"
4364 #include "petscbt.h"
4365 
4366 #undef __FUNCT__
4367 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4368 /*@C
4369       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4370                  matrices from each processor
4371 
4372     Collective on MPI_Comm
4373 
4374    Input Parameters:
4375 +    comm - the communicators the parallel matrix will live on
4376 .    seqmat - the input sequential matrices
4377 .    m - number of local rows (or PETSC_DECIDE)
4378 .    n - number of local columns (or PETSC_DECIDE)
4379 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4380 
4381    Output Parameter:
4382 .    mpimat - the parallel matrix generated
4383 
4384     Level: advanced
4385 
4386    Notes:
4387      The dimensions of the sequential matrix in each processor MUST be the same.
4388      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4389      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4390 @*/
4391 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4392 {
4393   PetscErrorCode       ierr;
4394   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4395   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4396   PetscMPIInt          size,rank,taga,*len_s;
4397   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4398   PetscInt             proc,m;
4399   PetscInt             **buf_ri,**buf_rj;
4400   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4401   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4402   MPI_Request          *s_waits,*r_waits;
4403   MPI_Status           *status;
4404   MatScalar            *aa=a->a;
4405   MatScalar            **abuf_r,*ba_i;
4406   Mat_Merge_SeqsToMPI  *merge;
4407   PetscContainer       container;
4408 
4409   PetscFunctionBegin;
4410   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4411 
4412   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4413   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4414 
4415   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4416   if (container) {
4417     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4418   }
4419   bi     = merge->bi;
4420   bj     = merge->bj;
4421   buf_ri = merge->buf_ri;
4422   buf_rj = merge->buf_rj;
4423 
4424   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4425   owners = merge->rowmap->range;
4426   len_s  = merge->len_s;
4427 
4428   /* send and recv matrix values */
4429   /*-----------------------------*/
4430   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4431   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4432 
4433   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4434   for (proc=0,k=0; proc<size; proc++){
4435     if (!len_s[proc]) continue;
4436     i = owners[proc];
4437     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4438     k++;
4439   }
4440 
4441   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4442   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4443   ierr = PetscFree(status);CHKERRQ(ierr);
4444 
4445   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4446   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4447 
4448   /* insert mat values of mpimat */
4449   /*----------------------------*/
4450   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4451   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4452 
4453   for (k=0; k<merge->nrecv; k++){
4454     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4455     nrows = *(buf_ri_k[k]);
4456     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4457     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4458   }
4459 
4460   /* set values of ba */
4461   m = merge->rowmap->n;
4462   for (i=0; i<m; i++) {
4463     arow = owners[rank] + i;
4464     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4465     bnzi = bi[i+1] - bi[i];
4466     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4467 
4468     /* add local non-zero vals of this proc's seqmat into ba */
4469     anzi = ai[arow+1] - ai[arow];
4470     aj   = a->j + ai[arow];
4471     aa   = a->a + ai[arow];
4472     nextaj = 0;
4473     for (j=0; nextaj<anzi; j++){
4474       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4475         ba_i[j] += aa[nextaj++];
4476       }
4477     }
4478 
4479     /* add received vals into ba */
4480     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4481       /* i-th row */
4482       if (i == *nextrow[k]) {
4483         anzi = *(nextai[k]+1) - *nextai[k];
4484         aj   = buf_rj[k] + *(nextai[k]);
4485         aa   = abuf_r[k] + *(nextai[k]);
4486         nextaj = 0;
4487         for (j=0; nextaj<anzi; j++){
4488           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4489             ba_i[j] += aa[nextaj++];
4490           }
4491         }
4492         nextrow[k]++; nextai[k]++;
4493       }
4494     }
4495     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4496   }
4497   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4498   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4499 
4500   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4501   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4502   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4503   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4504   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4505   PetscFunctionReturn(0);
4506 }
4507 
4508 #undef __FUNCT__
4509 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4510 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4511 {
4512   PetscErrorCode       ierr;
4513   Mat                  B_mpi;
4514   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4515   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4516   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4517   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4518   PetscInt             len,proc,*dnz,*onz;
4519   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4520   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4521   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4522   MPI_Status           *status;
4523   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4524   PetscBT              lnkbt;
4525   Mat_Merge_SeqsToMPI  *merge;
4526   PetscContainer       container;
4527 
4528   PetscFunctionBegin;
4529   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4530 
4531   /* make sure it is a PETSc comm */
4532   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4533   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4534   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4535 
4536   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4537   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4538 
4539   /* determine row ownership */
4540   /*---------------------------------------------------------*/
4541   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4542   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4543   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4544   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4545   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4546   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4547   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4548 
4549   m      = merge->rowmap->n;
4550   M      = merge->rowmap->N;
4551   owners = merge->rowmap->range;
4552 
4553   /* determine the number of messages to send, their lengths */
4554   /*---------------------------------------------------------*/
4555   len_s  = merge->len_s;
4556 
4557   len = 0;  /* length of buf_si[] */
4558   merge->nsend = 0;
4559   for (proc=0; proc<size; proc++){
4560     len_si[proc] = 0;
4561     if (proc == rank){
4562       len_s[proc] = 0;
4563     } else {
4564       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4565       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4566     }
4567     if (len_s[proc]) {
4568       merge->nsend++;
4569       nrows = 0;
4570       for (i=owners[proc]; i<owners[proc+1]; i++){
4571         if (ai[i+1] > ai[i]) nrows++;
4572       }
4573       len_si[proc] = 2*(nrows+1);
4574       len += len_si[proc];
4575     }
4576   }
4577 
4578   /* determine the number and length of messages to receive for ij-structure */
4579   /*-------------------------------------------------------------------------*/
4580   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4581   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4582 
4583   /* post the Irecv of j-structure */
4584   /*-------------------------------*/
4585   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4586   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4587 
4588   /* post the Isend of j-structure */
4589   /*--------------------------------*/
4590   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4591 
4592   for (proc=0, k=0; proc<size; proc++){
4593     if (!len_s[proc]) continue;
4594     i = owners[proc];
4595     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4596     k++;
4597   }
4598 
4599   /* receives and sends of j-structure are complete */
4600   /*------------------------------------------------*/
4601   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4602   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4603 
4604   /* send and recv i-structure */
4605   /*---------------------------*/
4606   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4607   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4608 
4609   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4610   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4611   for (proc=0,k=0; proc<size; proc++){
4612     if (!len_s[proc]) continue;
4613     /* form outgoing message for i-structure:
4614          buf_si[0]:                 nrows to be sent
4615                [1:nrows]:           row index (global)
4616                [nrows+1:2*nrows+1]: i-structure index
4617     */
4618     /*-------------------------------------------*/
4619     nrows = len_si[proc]/2 - 1;
4620     buf_si_i    = buf_si + nrows+1;
4621     buf_si[0]   = nrows;
4622     buf_si_i[0] = 0;
4623     nrows = 0;
4624     for (i=owners[proc]; i<owners[proc+1]; i++){
4625       anzi = ai[i+1] - ai[i];
4626       if (anzi) {
4627         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4628         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4629         nrows++;
4630       }
4631     }
4632     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4633     k++;
4634     buf_si += len_si[proc];
4635   }
4636 
4637   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4638   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4639 
4640   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4641   for (i=0; i<merge->nrecv; i++){
4642     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4643   }
4644 
4645   ierr = PetscFree(len_si);CHKERRQ(ierr);
4646   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4647   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4648   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4649   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4650   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4651   ierr = PetscFree(status);CHKERRQ(ierr);
4652 
4653   /* compute a local seq matrix in each processor */
4654   /*----------------------------------------------*/
4655   /* allocate bi array and free space for accumulating nonzero column info */
4656   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4657   bi[0] = 0;
4658 
4659   /* create and initialize a linked list */
4660   nlnk = N+1;
4661   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4662 
4663   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4664   len = 0;
4665   len  = ai[owners[rank+1]] - ai[owners[rank]];
4666   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4667   current_space = free_space;
4668 
4669   /* determine symbolic info for each local row */
4670   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4671 
4672   for (k=0; k<merge->nrecv; k++){
4673     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4674     nrows = *buf_ri_k[k];
4675     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4676     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4677   }
4678 
4679   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4680   len = 0;
4681   for (i=0;i<m;i++) {
4682     bnzi   = 0;
4683     /* add local non-zero cols of this proc's seqmat into lnk */
4684     arow   = owners[rank] + i;
4685     anzi   = ai[arow+1] - ai[arow];
4686     aj     = a->j + ai[arow];
4687     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4688     bnzi += nlnk;
4689     /* add received col data into lnk */
4690     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4691       if (i == *nextrow[k]) { /* i-th row */
4692         anzi = *(nextai[k]+1) - *nextai[k];
4693         aj   = buf_rj[k] + *nextai[k];
4694         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4695         bnzi += nlnk;
4696         nextrow[k]++; nextai[k]++;
4697       }
4698     }
4699     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4700 
4701     /* if free space is not available, make more free space */
4702     if (current_space->local_remaining<bnzi) {
4703       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4704       nspacedouble++;
4705     }
4706     /* copy data into free space, then initialize lnk */
4707     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4708     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4709 
4710     current_space->array           += bnzi;
4711     current_space->local_used      += bnzi;
4712     current_space->local_remaining -= bnzi;
4713 
4714     bi[i+1] = bi[i] + bnzi;
4715   }
4716 
4717   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4718 
4719   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4720   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4721   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4722 
4723   /* create symbolic parallel matrix B_mpi */
4724   /*---------------------------------------*/
4725   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4726   if (n==PETSC_DECIDE) {
4727     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4728   } else {
4729     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4730   }
4731   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4732   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4733   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4734 
4735   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4736   B_mpi->assembled     = PETSC_FALSE;
4737   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4738   merge->bi            = bi;
4739   merge->bj            = bj;
4740   merge->buf_ri        = buf_ri;
4741   merge->buf_rj        = buf_rj;
4742   merge->coi           = PETSC_NULL;
4743   merge->coj           = PETSC_NULL;
4744   merge->owners_co     = PETSC_NULL;
4745 
4746   /* attach the supporting struct to B_mpi for reuse */
4747   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4748   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4749   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4750   *mpimat = B_mpi;
4751 
4752   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4753   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4754   PetscFunctionReturn(0);
4755 }
4756 
4757 #undef __FUNCT__
4758 #define __FUNCT__ "MatMerge_SeqsToMPI"
4759 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4760 {
4761   PetscErrorCode   ierr;
4762 
4763   PetscFunctionBegin;
4764   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4765   if (scall == MAT_INITIAL_MATRIX){
4766     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4767   }
4768   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4769   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4770   PetscFunctionReturn(0);
4771 }
4772 
4773 #undef __FUNCT__
4774 #define __FUNCT__ "MatGetLocalMat"
4775 /*@
4776      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4777 
4778     Not Collective
4779 
4780    Input Parameters:
4781 +    A - the matrix
4782 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4783 
4784    Output Parameter:
4785 .    A_loc - the local sequential matrix generated
4786 
4787     Level: developer
4788 
4789 @*/
4790 PetscErrorCode  MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4791 {
4792   PetscErrorCode  ierr;
4793   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4794   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4795   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4796   MatScalar       *aa=a->a,*ba=b->a,*cam;
4797   PetscScalar     *ca;
4798   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4799   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4800 
4801   PetscFunctionBegin;
4802   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4803   if (scall == MAT_INITIAL_MATRIX){
4804     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4805     ci[0] = 0;
4806     for (i=0; i<am; i++){
4807       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4808     }
4809     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4810     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4811     k = 0;
4812     for (i=0; i<am; i++) {
4813       ncols_o = bi[i+1] - bi[i];
4814       ncols_d = ai[i+1] - ai[i];
4815       /* off-diagonal portion of A */
4816       for (jo=0; jo<ncols_o; jo++) {
4817         col = cmap[*bj];
4818         if (col >= cstart) break;
4819         cj[k]   = col; bj++;
4820         ca[k++] = *ba++;
4821       }
4822       /* diagonal portion of A */
4823       for (j=0; j<ncols_d; j++) {
4824         cj[k]   = cstart + *aj++;
4825         ca[k++] = *aa++;
4826       }
4827       /* off-diagonal portion of A */
4828       for (j=jo; j<ncols_o; j++) {
4829         cj[k]   = cmap[*bj++];
4830         ca[k++] = *ba++;
4831       }
4832     }
4833     /* put together the new matrix */
4834     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4835     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4836     /* Since these are PETSc arrays, change flags to free them as necessary. */
4837     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4838     mat->free_a  = PETSC_TRUE;
4839     mat->free_ij = PETSC_TRUE;
4840     mat->nonew   = 0;
4841   } else if (scall == MAT_REUSE_MATRIX){
4842     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4843     ci = mat->i; cj = mat->j; cam = mat->a;
4844     for (i=0; i<am; i++) {
4845       /* off-diagonal portion of A */
4846       ncols_o = bi[i+1] - bi[i];
4847       for (jo=0; jo<ncols_o; jo++) {
4848         col = cmap[*bj];
4849         if (col >= cstart) break;
4850         *cam++ = *ba++; bj++;
4851       }
4852       /* diagonal portion of A */
4853       ncols_d = ai[i+1] - ai[i];
4854       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4855       /* off-diagonal portion of A */
4856       for (j=jo; j<ncols_o; j++) {
4857         *cam++ = *ba++; bj++;
4858       }
4859     }
4860   } else {
4861     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4862   }
4863 
4864   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4865   PetscFunctionReturn(0);
4866 }
4867 
4868 #undef __FUNCT__
4869 #define __FUNCT__ "MatGetLocalMatCondensed"
4870 /*@C
4871      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4872 
4873     Not Collective
4874 
4875    Input Parameters:
4876 +    A - the matrix
4877 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4878 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4879 
4880    Output Parameter:
4881 .    A_loc - the local sequential matrix generated
4882 
4883     Level: developer
4884 
4885 @*/
4886 PetscErrorCode  MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4887 {
4888   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4889   PetscErrorCode    ierr;
4890   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4891   IS                isrowa,iscola;
4892   Mat               *aloc;
4893 
4894   PetscFunctionBegin;
4895   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4896   if (!row){
4897     start = A->rmap->rstart; end = A->rmap->rend;
4898     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4899   } else {
4900     isrowa = *row;
4901   }
4902   if (!col){
4903     start = A->cmap->rstart;
4904     cmap  = a->garray;
4905     nzA   = a->A->cmap->n;
4906     nzB   = a->B->cmap->n;
4907     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4908     ncols = 0;
4909     for (i=0; i<nzB; i++) {
4910       if (cmap[i] < start) idx[ncols++] = cmap[i];
4911       else break;
4912     }
4913     imark = i;
4914     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4915     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4916     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4917   } else {
4918     iscola = *col;
4919   }
4920   if (scall != MAT_INITIAL_MATRIX){
4921     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4922     aloc[0] = *A_loc;
4923   }
4924   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4925   *A_loc = aloc[0];
4926   ierr = PetscFree(aloc);CHKERRQ(ierr);
4927   if (!row){
4928     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4929   }
4930   if (!col){
4931     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4932   }
4933   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4934   PetscFunctionReturn(0);
4935 }
4936 
4937 #undef __FUNCT__
4938 #define __FUNCT__ "MatGetBrowsOfAcols"
4939 /*@C
4940     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4941 
4942     Collective on Mat
4943 
4944    Input Parameters:
4945 +    A,B - the matrices in mpiaij format
4946 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4947 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4948 
4949    Output Parameter:
4950 +    rowb, colb - index sets of rows and columns of B to extract
4951 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4952 -    B_seq - the sequential matrix generated
4953 
4954     Level: developer
4955 
4956 @*/
4957 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4958 {
4959   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4960   PetscErrorCode    ierr;
4961   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4962   IS                isrowb,iscolb;
4963   Mat               *bseq;
4964 
4965   PetscFunctionBegin;
4966   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4967     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4968   }
4969   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4970 
4971   if (scall == MAT_INITIAL_MATRIX){
4972     start = A->cmap->rstart;
4973     cmap  = a->garray;
4974     nzA   = a->A->cmap->n;
4975     nzB   = a->B->cmap->n;
4976     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4977     ncols = 0;
4978     for (i=0; i<nzB; i++) {  /* row < local row index */
4979       if (cmap[i] < start) idx[ncols++] = cmap[i];
4980       else break;
4981     }
4982     imark = i;
4983     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4984     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4985     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4986     *brstart = imark;
4987     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4988   } else {
4989     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4990     isrowb = *rowb; iscolb = *colb;
4991     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4992     bseq[0] = *B_seq;
4993   }
4994   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4995   *B_seq = bseq[0];
4996   ierr = PetscFree(bseq);CHKERRQ(ierr);
4997   if (!rowb){
4998     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4999   } else {
5000     *rowb = isrowb;
5001   }
5002   if (!colb){
5003     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
5004   } else {
5005     *colb = iscolb;
5006   }
5007   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5008   PetscFunctionReturn(0);
5009 }
5010 
5011 #undef __FUNCT__
5012 #define __FUNCT__ "MatGetBrowsOfAoCols"
5013 /*@C
5014     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5015     of the OFF-DIAGONAL portion of local A
5016 
5017     Collective on Mat
5018 
5019    Input Parameters:
5020 +    A,B - the matrices in mpiaij format
5021 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5022 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5023 .    startsj_r - similar to startsj for receives
5024 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5025 
5026    Output Parameter:
5027 +    B_oth - the sequential matrix generated
5028 
5029     Level: developer
5030 
5031 @*/
5032 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5033 {
5034   VecScatter_MPI_General *gen_to,*gen_from;
5035   PetscErrorCode         ierr;
5036   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5037   Mat_SeqAIJ             *b_oth;
5038   VecScatter             ctx=a->Mvctx;
5039   MPI_Comm               comm=((PetscObject)ctx)->comm;
5040   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5041   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5042   PetscScalar            *rvalues,*svalues;
5043   MatScalar              *b_otha,*bufa,*bufA;
5044   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5045   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5046   MPI_Status             *sstatus,rstatus;
5047   PetscMPIInt            jj;
5048   PetscInt               *cols,sbs,rbs;
5049   PetscScalar            *vals;
5050 
5051   PetscFunctionBegin;
5052   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5053     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5054   }
5055   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5056   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5057 
5058   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5059   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5060   rvalues  = gen_from->values; /* holds the length of receiving row */
5061   svalues  = gen_to->values;   /* holds the length of sending row */
5062   nrecvs   = gen_from->n;
5063   nsends   = gen_to->n;
5064 
5065   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5066   srow     = gen_to->indices;   /* local row index to be sent */
5067   sstarts  = gen_to->starts;
5068   sprocs   = gen_to->procs;
5069   sstatus  = gen_to->sstatus;
5070   sbs      = gen_to->bs;
5071   rstarts  = gen_from->starts;
5072   rprocs   = gen_from->procs;
5073   rbs      = gen_from->bs;
5074 
5075   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5076   if (scall == MAT_INITIAL_MATRIX){
5077     /* i-array */
5078     /*---------*/
5079     /*  post receives */
5080     for (i=0; i<nrecvs; i++){
5081       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5082       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5083       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5084     }
5085 
5086     /* pack the outgoing message */
5087     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5088     sstartsj[0] = 0;  rstartsj[0] = 0;
5089     len = 0; /* total length of j or a array to be sent */
5090     k = 0;
5091     for (i=0; i<nsends; i++){
5092       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5093       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5094       for (j=0; j<nrows; j++) {
5095         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5096         for (l=0; l<sbs; l++){
5097           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5098           rowlen[j*sbs+l] = ncols;
5099           len += ncols;
5100           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5101         }
5102         k++;
5103       }
5104       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5105       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5106     }
5107     /* recvs and sends of i-array are completed */
5108     i = nrecvs;
5109     while (i--) {
5110       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5111     }
5112     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5113 
5114     /* allocate buffers for sending j and a arrays */
5115     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5116     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5117 
5118     /* create i-array of B_oth */
5119     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5120     b_othi[0] = 0;
5121     len = 0; /* total length of j or a array to be received */
5122     k = 0;
5123     for (i=0; i<nrecvs; i++){
5124       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5125       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5126       for (j=0; j<nrows; j++) {
5127         b_othi[k+1] = b_othi[k] + rowlen[j];
5128         len += rowlen[j]; k++;
5129       }
5130       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5131     }
5132 
5133     /* allocate space for j and a arrrays of B_oth */
5134     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5135     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5136 
5137     /* j-array */
5138     /*---------*/
5139     /*  post receives of j-array */
5140     for (i=0; i<nrecvs; i++){
5141       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5142       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5143     }
5144 
5145     /* pack the outgoing message j-array */
5146     k = 0;
5147     for (i=0; i<nsends; i++){
5148       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5149       bufJ = bufj+sstartsj[i];
5150       for (j=0; j<nrows; j++) {
5151         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5152         for (ll=0; ll<sbs; ll++){
5153           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5154           for (l=0; l<ncols; l++){
5155             *bufJ++ = cols[l];
5156           }
5157           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5158         }
5159       }
5160       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5161     }
5162 
5163     /* recvs and sends of j-array are completed */
5164     i = nrecvs;
5165     while (i--) {
5166       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5167     }
5168     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5169   } else if (scall == MAT_REUSE_MATRIX){
5170     sstartsj = *startsj;
5171     rstartsj = *startsj_r;
5172     bufa     = *bufa_ptr;
5173     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5174     b_otha   = b_oth->a;
5175   } else {
5176     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5177   }
5178 
5179   /* a-array */
5180   /*---------*/
5181   /*  post receives of a-array */
5182   for (i=0; i<nrecvs; i++){
5183     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5184     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5185   }
5186 
5187   /* pack the outgoing message a-array */
5188   k = 0;
5189   for (i=0; i<nsends; i++){
5190     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5191     bufA = bufa+sstartsj[i];
5192     for (j=0; j<nrows; j++) {
5193       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5194       for (ll=0; ll<sbs; ll++){
5195         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5196         for (l=0; l<ncols; l++){
5197           *bufA++ = vals[l];
5198         }
5199         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5200       }
5201     }
5202     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5203   }
5204   /* recvs and sends of a-array are completed */
5205   i = nrecvs;
5206   while (i--) {
5207     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5208   }
5209   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5210   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5211 
5212   if (scall == MAT_INITIAL_MATRIX){
5213     /* put together the new matrix */
5214     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5215 
5216     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5217     /* Since these are PETSc arrays, change flags to free them as necessary. */
5218     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5219     b_oth->free_a  = PETSC_TRUE;
5220     b_oth->free_ij = PETSC_TRUE;
5221     b_oth->nonew   = 0;
5222 
5223     ierr = PetscFree(bufj);CHKERRQ(ierr);
5224     if (!startsj || !bufa_ptr){
5225       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5226       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5227     } else {
5228       *startsj   = sstartsj;
5229       *startsj_r = rstartsj;
5230       *bufa_ptr  = bufa;
5231     }
5232   }
5233   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5234   PetscFunctionReturn(0);
5235 }
5236 
5237 #undef __FUNCT__
5238 #define __FUNCT__ "MatGetCommunicationStructs"
5239 /*@C
5240   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5241 
5242   Not Collective
5243 
5244   Input Parameters:
5245 . A - The matrix in mpiaij format
5246 
5247   Output Parameter:
5248 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5249 . colmap - A map from global column index to local index into lvec
5250 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5251 
5252   Level: developer
5253 
5254 @*/
5255 #if defined (PETSC_USE_CTABLE)
5256 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5257 #else
5258 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5259 #endif
5260 {
5261   Mat_MPIAIJ *a;
5262 
5263   PetscFunctionBegin;
5264   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5265   PetscValidPointer(lvec, 2);
5266   PetscValidPointer(colmap, 3);
5267   PetscValidPointer(multScatter, 4);
5268   a = (Mat_MPIAIJ *) A->data;
5269   if (lvec) *lvec = a->lvec;
5270   if (colmap) *colmap = a->colmap;
5271   if (multScatter) *multScatter = a->Mvctx;
5272   PetscFunctionReturn(0);
5273 }
5274 
5275 EXTERN_C_BEGIN
5276 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5277 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5278 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5279 EXTERN_C_END
5280 
5281 #undef __FUNCT__
5282 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5283 /*
5284     Computes (B'*A')' since computing B*A directly is untenable
5285 
5286                n                       p                          p
5287         (              )       (              )         (                  )
5288       m (      A       )  *  n (       B      )   =   m (         C        )
5289         (              )       (              )         (                  )
5290 
5291 */
5292 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5293 {
5294   PetscErrorCode     ierr;
5295   Mat                At,Bt,Ct;
5296 
5297   PetscFunctionBegin;
5298   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5299   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5300   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5301   ierr = MatDestroy(At);CHKERRQ(ierr);
5302   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5303   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5304   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5305   PetscFunctionReturn(0);
5306 }
5307 
5308 #undef __FUNCT__
5309 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5310 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5311 {
5312   PetscErrorCode ierr;
5313   PetscInt       m=A->rmap->n,n=B->cmap->n;
5314   Mat            Cmat;
5315 
5316   PetscFunctionBegin;
5317   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5318   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5319   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5320   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5321   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5322   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5323   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5324   *C   = Cmat;
5325   PetscFunctionReturn(0);
5326 }
5327 
5328 /* ----------------------------------------------------------------*/
5329 #undef __FUNCT__
5330 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5331 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5332 {
5333   PetscErrorCode ierr;
5334 
5335   PetscFunctionBegin;
5336   if (scall == MAT_INITIAL_MATRIX){
5337     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5338   }
5339   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5340   PetscFunctionReturn(0);
5341 }
5342 
5343 EXTERN_C_BEGIN
5344 #if defined(PETSC_HAVE_MUMPS)
5345 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5346 #endif
5347 #if defined(PETSC_HAVE_PASTIX)
5348 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5349 #endif
5350 #if defined(PETSC_HAVE_SUPERLU_DIST)
5351 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5352 #endif
5353 #if defined(PETSC_HAVE_SPOOLES)
5354 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5355 #endif
5356 EXTERN_C_END
5357 
5358 /*MC
5359    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5360 
5361    Options Database Keys:
5362 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5363 
5364   Level: beginner
5365 
5366 .seealso: MatCreateMPIAIJ()
5367 M*/
5368 
5369 EXTERN_C_BEGIN
5370 #undef __FUNCT__
5371 #define __FUNCT__ "MatCreate_MPIAIJ"
5372 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5373 {
5374   Mat_MPIAIJ     *b;
5375   PetscErrorCode ierr;
5376   PetscMPIInt    size;
5377 
5378   PetscFunctionBegin;
5379   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5380 
5381   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5382   B->data         = (void*)b;
5383   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5384   B->rmap->bs     = 1;
5385   B->assembled    = PETSC_FALSE;
5386 
5387   B->insertmode   = NOT_SET_VALUES;
5388   b->size         = size;
5389   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5390 
5391   /* build cache for off array entries formed */
5392   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5393   b->donotstash  = PETSC_FALSE;
5394   b->colmap      = 0;
5395   b->garray      = 0;
5396   b->roworiented = PETSC_TRUE;
5397 
5398   /* stuff used for matrix vector multiply */
5399   b->lvec      = PETSC_NULL;
5400   b->Mvctx     = PETSC_NULL;
5401 
5402   /* stuff for MatGetRow() */
5403   b->rowindices   = 0;
5404   b->rowvalues    = 0;
5405   b->getrowactive = PETSC_FALSE;
5406 
5407 #if defined(PETSC_HAVE_SPOOLES)
5408   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5409                                      "MatGetFactor_mpiaij_spooles",
5410                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5411 #endif
5412 #if defined(PETSC_HAVE_MUMPS)
5413   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5414                                      "MatGetFactor_aij_mumps",
5415                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5416 #endif
5417 #if defined(PETSC_HAVE_PASTIX)
5418   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5419 					   "MatGetFactor_mpiaij_pastix",
5420 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5421 #endif
5422 #if defined(PETSC_HAVE_SUPERLU_DIST)
5423   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5424                                      "MatGetFactor_mpiaij_superlu_dist",
5425                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5426 #endif
5427   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5428                                      "MatStoreValues_MPIAIJ",
5429                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5430   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5431                                      "MatRetrieveValues_MPIAIJ",
5432                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5433   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5434 				     "MatGetDiagonalBlock_MPIAIJ",
5435                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5436   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5437 				     "MatIsTranspose_MPIAIJ",
5438 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5439   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5440 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5441 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5442   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5443 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5444 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5445   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5446 				     "MatDiagonalScaleLocal_MPIAIJ",
5447 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5448   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5449                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5450                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5451   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5452                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5453                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5454   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5455                                      "MatConvert_MPIAIJ_MPISBAIJ",
5456                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5457   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5458                                      "MatMatMult_MPIDense_MPIAIJ",
5459                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5460   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5461                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5462                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5463   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5464                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5465                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5466   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5467   PetscFunctionReturn(0);
5468 }
5469 EXTERN_C_END
5470 
5471 #undef __FUNCT__
5472 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5473 /*@
5474      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5475          and "off-diagonal" part of the matrix in CSR format.
5476 
5477    Collective on MPI_Comm
5478 
5479    Input Parameters:
5480 +  comm - MPI communicator
5481 .  m - number of local rows (Cannot be PETSC_DECIDE)
5482 .  n - This value should be the same as the local size used in creating the
5483        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5484        calculated if N is given) For square matrices n is almost always m.
5485 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5486 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5487 .   i - row indices for "diagonal" portion of matrix
5488 .   j - column indices
5489 .   a - matrix values
5490 .   oi - row indices for "off-diagonal" portion of matrix
5491 .   oj - column indices
5492 -   oa - matrix values
5493 
5494    Output Parameter:
5495 .   mat - the matrix
5496 
5497    Level: advanced
5498 
5499    Notes:
5500        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5501        must free the arrays once the matrix has been destroyed and not before.
5502 
5503        The i and j indices are 0 based
5504 
5505        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5506 
5507        This sets local rows and cannot be used to set off-processor values.
5508 
5509        You cannot later use MatSetValues() to change values in this matrix.
5510 
5511 .keywords: matrix, aij, compressed row, sparse, parallel
5512 
5513 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5514           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5515 @*/
5516 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5517 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5518 {
5519   PetscErrorCode ierr;
5520   Mat_MPIAIJ     *maij;
5521 
5522  PetscFunctionBegin;
5523   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5524   if (i[0]) {
5525     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5526   }
5527   if (oi[0]) {
5528     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5529   }
5530   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5531   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5532   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5533   maij = (Mat_MPIAIJ*) (*mat)->data;
5534   maij->donotstash     = PETSC_TRUE;
5535   (*mat)->preallocated = PETSC_TRUE;
5536 
5537   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5538   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5539   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5540   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5541 
5542   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5543   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5544 
5545   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5546   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5547   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5548   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5549 
5550   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5551   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5552   PetscFunctionReturn(0);
5553 }
5554 
5555 /*
5556     Special version for direct calls from Fortran
5557 */
5558 #include "private/fortranimpl.h"
5559 
5560 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5561 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5562 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5563 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5564 #endif
5565 
5566 /* Change these macros so can be used in void function */
5567 #undef CHKERRQ
5568 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5569 #undef SETERRQ2
5570 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5571 #undef SETERRQ
5572 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5573 
5574 EXTERN_C_BEGIN
5575 #undef __FUNCT__
5576 #define __FUNCT__ "matsetvaluesmpiaij_"
5577 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5578 {
5579   Mat             mat = *mmat;
5580   PetscInt        m = *mm, n = *mn;
5581   InsertMode      addv = *maddv;
5582   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5583   PetscScalar     value;
5584   PetscErrorCode  ierr;
5585 
5586   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5587   if (mat->insertmode == NOT_SET_VALUES) {
5588     mat->insertmode = addv;
5589   }
5590 #if defined(PETSC_USE_DEBUG)
5591   else if (mat->insertmode != addv) {
5592     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5593   }
5594 #endif
5595   {
5596   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5597   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5598   PetscBool       roworiented = aij->roworiented;
5599 
5600   /* Some Variables required in the macro */
5601   Mat             A = aij->A;
5602   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5603   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5604   MatScalar       *aa = a->a;
5605   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5606   Mat             B = aij->B;
5607   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5608   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5609   MatScalar       *ba = b->a;
5610 
5611   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5612   PetscInt        nonew = a->nonew;
5613   MatScalar       *ap1,*ap2;
5614 
5615   PetscFunctionBegin;
5616   for (i=0; i<m; i++) {
5617     if (im[i] < 0) continue;
5618 #if defined(PETSC_USE_DEBUG)
5619     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5620 #endif
5621     if (im[i] >= rstart && im[i] < rend) {
5622       row      = im[i] - rstart;
5623       lastcol1 = -1;
5624       rp1      = aj + ai[row];
5625       ap1      = aa + ai[row];
5626       rmax1    = aimax[row];
5627       nrow1    = ailen[row];
5628       low1     = 0;
5629       high1    = nrow1;
5630       lastcol2 = -1;
5631       rp2      = bj + bi[row];
5632       ap2      = ba + bi[row];
5633       rmax2    = bimax[row];
5634       nrow2    = bilen[row];
5635       low2     = 0;
5636       high2    = nrow2;
5637 
5638       for (j=0; j<n; j++) {
5639         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5640         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5641         if (in[j] >= cstart && in[j] < cend){
5642           col = in[j] - cstart;
5643           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5644         } else if (in[j] < 0) continue;
5645 #if defined(PETSC_USE_DEBUG)
5646         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5647 #endif
5648         else {
5649           if (mat->was_assembled) {
5650             if (!aij->colmap) {
5651               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5652             }
5653 #if defined (PETSC_USE_CTABLE)
5654             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5655 	    col--;
5656 #else
5657             col = aij->colmap[in[j]] - 1;
5658 #endif
5659             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5660               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5661               col =  in[j];
5662               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5663               B = aij->B;
5664               b = (Mat_SeqAIJ*)B->data;
5665               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5666               rp2      = bj + bi[row];
5667               ap2      = ba + bi[row];
5668               rmax2    = bimax[row];
5669               nrow2    = bilen[row];
5670               low2     = 0;
5671               high2    = nrow2;
5672               bm       = aij->B->rmap->n;
5673               ba = b->a;
5674             }
5675           } else col = in[j];
5676           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5677         }
5678       }
5679     } else {
5680       if (!aij->donotstash) {
5681         if (roworiented) {
5682           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5683         } else {
5684           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5685         }
5686       }
5687     }
5688   }}
5689   PetscFunctionReturnVoid();
5690 }
5691 EXTERN_C_END
5692 
5693