xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b3e8af9f4df2caca5f1b00a1692f5f6a371a2808)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscBool      aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscBool      roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscBool      ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash || mat->nooffprocentries) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   mat->insertmode = addv; /* in case this processor had no cache */
470 
471   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
472   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
473   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
480 {
481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
482   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
483   PetscErrorCode ierr;
484   PetscMPIInt    n;
485   PetscInt       i,j,rstart,ncols,flg;
486   PetscInt       *row,*col;
487   PetscBool      other_disassembled;
488   PetscScalar    *val;
489   InsertMode     addv = mat->insertmode;
490 
491   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
492   PetscFunctionBegin;
493   if (!aij->donotstash && !mat->nooffprocentries) {
494     while (1) {
495       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
496       if (!flg) break;
497 
498       for (i=0; i<n;) {
499         /* Now identify the consecutive vals belonging to the same row */
500         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
501         if (j < n) ncols = j-i;
502         else       ncols = n-i;
503         /* Now assemble all these values with a single function call */
504         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
505         i = j;
506       }
507     }
508     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
509   }
510   a->compressedrow.use     = PETSC_FALSE;
511   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
512   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
513 
514   /* determine if any processor has disassembled, if so we must
515      also disassemble ourselfs, in order that we may reassemble. */
516   /*
517      if nonzero structure of submatrix B cannot change then we know that
518      no processor disassembled thus we can skip this stuff
519   */
520   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
521     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
522     if (mat->was_assembled && !other_disassembled) {
523       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
524     }
525   }
526   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
527     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
528   }
529   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
530   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
531   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
532   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
533 
534   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
535   aij->rowvalues = 0;
536 
537   /* used by MatAXPY() */
538   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
539   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
540 
541   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
542   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
562 {
563   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode    ierr;
565   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt          i,*owners = A->rmap->range;
567   PetscInt          *nprocs,j,idx,nsends,row;
568   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt          *rvalues,count,base,slen,*source;
570   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm          comm = ((PetscObject)A)->comm;
572   MPI_Request       *send_waits,*recv_waits;
573   MPI_Status        recv_status,*send_status;
574   const PetscScalar *xx;
575   PetscScalar       *bb;
576 #if defined(PETSC_DEBUG)
577   PetscBool      found = PETSC_FALSE;
578 #endif
579 
580   PetscFunctionBegin;
581   /*  first count number of contributors to each processor */
582   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
583   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
584   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
585   j = 0;
586   for (i=0; i<N; i++) {
587     if (lastidx > (idx = rows[i])) j = 0;
588     lastidx = idx;
589     for (; j<size; j++) {
590       if (idx >= owners[j] && idx < owners[j+1]) {
591         nprocs[2*j]++;
592         nprocs[2*j+1] = 1;
593         owner[i] = j;
594 #if defined(PETSC_DEBUG)
595         found = PETSC_TRUE;
596 #endif
597         break;
598       }
599     }
600 #if defined(PETSC_DEBUG)
601     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
602     found = PETSC_FALSE;
603 #endif
604   }
605   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
606 
607   if (A->nooffproczerorows) {
608     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
609     nrecvs = nsends;
610     nmax   = N;
611   } else {
612     /* inform other processors of number of messages and max length*/
613     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
614   }
615 
616   /* post receives:   */
617   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
618   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
619   for (i=0; i<nrecvs; i++) {
620     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
621   }
622 
623   /* do sends:
624       1) starts[i] gives the starting index in svalues for stuff going to
625          the ith processor
626   */
627   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
628   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
629   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
630   starts[0] = 0;
631   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
632   for (i=0; i<N; i++) {
633     svalues[starts[owner[i]]++] = rows[i];
634   }
635 
636   starts[0] = 0;
637   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
638   count = 0;
639   for (i=0; i<size; i++) {
640     if (nprocs[2*i+1]) {
641       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
642     }
643   }
644   ierr = PetscFree(starts);CHKERRQ(ierr);
645 
646   base = owners[rank];
647 
648   /*  wait on receives */
649   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
650   count  = nrecvs; slen = 0;
651   while (count) {
652     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
653     /* unpack receives into our local space */
654     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
655     source[imdex]  = recv_status.MPI_SOURCE;
656     lens[imdex]    = n;
657     slen          += n;
658     count--;
659   }
660   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
661 
662   /* move the data into the send scatter */
663   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
664   count = 0;
665   for (i=0; i<nrecvs; i++) {
666     values = rvalues + i*nmax;
667     for (j=0; j<lens[i]; j++) {
668       lrows[count++] = values[j] - base;
669     }
670   }
671   ierr = PetscFree(rvalues);CHKERRQ(ierr);
672   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
673   ierr = PetscFree(owner);CHKERRQ(ierr);
674   ierr = PetscFree(nprocs);CHKERRQ(ierr);
675 
676   /* fix right hand side if needed */
677   if (x && b) {
678     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
679     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
680     for (i=0; i<slen; i++) {
681       bb[lrows[i]] = diag*xx[lrows[i]];
682     }
683     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
684     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
685   }
686   /*
687         Zero the required rows. If the "diagonal block" of the matrix
688      is square and the user wishes to set the diagonal we use separate
689      code so that MatSetValues() is not called for each diagonal allocating
690      new memory, thus calling lots of mallocs and slowing things down.
691 
692   */
693   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
694   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
695   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
696     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
697   } else if (diag != 0.0) {
698     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
699     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
700       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
701 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
702     }
703     for (i = 0; i < slen; i++) {
704       row  = lrows[i] + rstart;
705       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
706     }
707     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
709   } else {
710     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
711   }
712   ierr = PetscFree(lrows);CHKERRQ(ierr);
713 
714   /* wait on sends */
715   if (nsends) {
716     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
717     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
718     ierr = PetscFree(send_status);CHKERRQ(ierr);
719   }
720   ierr = PetscFree(send_waits);CHKERRQ(ierr);
721   ierr = PetscFree(svalues);CHKERRQ(ierr);
722 
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
728 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
729 {
730   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
731   PetscErrorCode    ierr;
732   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
733   PetscInt          i,*owners = A->rmap->range;
734   PetscInt          *nprocs,j,idx,nsends;
735   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
736   PetscInt          *rvalues,count,base,slen,*source;
737   PetscInt          *lens,*lrows,*values,m;
738   MPI_Comm          comm = ((PetscObject)A)->comm;
739   MPI_Request       *send_waits,*recv_waits;
740   MPI_Status        recv_status,*send_status;
741   const PetscScalar *xx;
742   PetscScalar       *bb,*mask;
743   Vec               xmask,lmask;
744   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
745   const PetscInt    *aj, *ii,*ridx;
746   PetscScalar       *aa;
747 #if defined(PETSC_DEBUG)
748   PetscBool         found = PETSC_FALSE;
749 #endif
750 
751   PetscFunctionBegin;
752   /*  first count number of contributors to each processor */
753   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
754   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
755   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
756   j = 0;
757   for (i=0; i<N; i++) {
758     if (lastidx > (idx = rows[i])) j = 0;
759     lastidx = idx;
760     for (; j<size; j++) {
761       if (idx >= owners[j] && idx < owners[j+1]) {
762         nprocs[2*j]++;
763         nprocs[2*j+1] = 1;
764         owner[i] = j;
765 #if defined(PETSC_DEBUG)
766         found = PETSC_TRUE;
767 #endif
768         break;
769       }
770     }
771 #if defined(PETSC_DEBUG)
772     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
773     found = PETSC_FALSE;
774 #endif
775   }
776   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
777 
778   /* inform other processors of number of messages and max length*/
779   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
780 
781   /* post receives:   */
782   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
783   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
784   for (i=0; i<nrecvs; i++) {
785     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
786   }
787 
788   /* do sends:
789       1) starts[i] gives the starting index in svalues for stuff going to
790          the ith processor
791   */
792   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
793   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
794   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
795   starts[0] = 0;
796   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
797   for (i=0; i<N; i++) {
798     svalues[starts[owner[i]]++] = rows[i];
799   }
800 
801   starts[0] = 0;
802   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
803   count = 0;
804   for (i=0; i<size; i++) {
805     if (nprocs[2*i+1]) {
806       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
807     }
808   }
809   ierr = PetscFree(starts);CHKERRQ(ierr);
810 
811   base = owners[rank];
812 
813   /*  wait on receives */
814   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
815   count  = nrecvs; slen = 0;
816   while (count) {
817     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
818     /* unpack receives into our local space */
819     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
820     source[imdex]  = recv_status.MPI_SOURCE;
821     lens[imdex]    = n;
822     slen          += n;
823     count--;
824   }
825   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
826 
827   /* move the data into the send scatter */
828   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
829   count = 0;
830   for (i=0; i<nrecvs; i++) {
831     values = rvalues + i*nmax;
832     for (j=0; j<lens[i]; j++) {
833       lrows[count++] = values[j] - base;
834     }
835   }
836   ierr = PetscFree(rvalues);CHKERRQ(ierr);
837   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
838   ierr = PetscFree(owner);CHKERRQ(ierr);
839   ierr = PetscFree(nprocs);CHKERRQ(ierr);
840   /* lrows are the local rows to be zeroed, slen is the number of local rows */
841 
842   /* zero diagonal part of matrix */
843   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
844 
845   /* handle off diagonal part of matrix */
846   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
847   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
848   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
849   for (i=0; i<slen; i++) {
850     bb[lrows[i]] = 1;
851   }
852   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
853   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
854   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
855   ierr = VecDestroy(xmask);CHKERRQ(ierr);
856   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
857   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
859   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
860   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
861 
862   /* remove zeroed rows of off diagonal matrix */
863   ii = aij->i;
864   for (i=0; i<slen; i++) {
865     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
866   }
867 
868   /* loop over all elements of off process part of matrix zeroing removed columns*/
869   if (aij->compressedrow.use){
870     m    = aij->compressedrow.nrows;
871     ii   = aij->compressedrow.i;
872     ridx = aij->compressedrow.rindex;
873     for (i=0; i<m; i++){
874       n   = ii[i+1] - ii[i];
875       aj  = aij->j + ii[i];
876       aa  = aij->a + ii[i];
877 
878       for (j=0; j<n; j++) {
879         if (PetscAbsScalar(mask[*aj])) {
880           bb[*ridx] -= *aa*xx[*aj];
881           *aa        = 0.0;
882         }
883         aa++;
884         aj++;
885       }
886       ridx++;
887     }
888   } else { /* do not use compressed row format */
889     m = l->B->rmap->n;
890    for (i=0; i<m; i++) {
891      n   = ii[i+1] - ii[i];
892      aj  = aij->j + ii[i];
893      aa  = aij->a + ii[i];
894      for (j=0; j<n; j++) {
895         if (PetscAbsScalar(mask[*aj])) {
896           bb[i] -= *aa*xx[*aj];
897           *aa    = 0.0;
898         }
899         aa++;
900         aj++;
901       }
902     }
903   }
904 
905   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
906   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
907   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
908   ierr = VecDestroy(lmask);CHKERRQ(ierr);
909   ierr = PetscFree(lrows);CHKERRQ(ierr);
910 
911   /* wait on sends */
912   if (nsends) {
913     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
914     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
915     ierr = PetscFree(send_status);CHKERRQ(ierr);
916   }
917   ierr = PetscFree(send_waits);CHKERRQ(ierr);
918   ierr = PetscFree(svalues);CHKERRQ(ierr);
919 
920   PetscFunctionReturn(0);
921 }
922 
923 #undef __FUNCT__
924 #define __FUNCT__ "MatMult_MPIAIJ"
925 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
926 {
927   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
928   PetscErrorCode ierr;
929   PetscInt       nt;
930 
931   PetscFunctionBegin;
932   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
933   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
934   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
935   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
936   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
937   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
943 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
944 {
945   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
946   PetscErrorCode ierr;
947 
948   PetscFunctionBegin;
949   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
950   PetscFunctionReturn(0);
951 }
952 
953 #undef __FUNCT__
954 #define __FUNCT__ "MatMultAdd_MPIAIJ"
955 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
956 {
957   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
958   PetscErrorCode ierr;
959 
960   PetscFunctionBegin;
961   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
963   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
964   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
970 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
971 {
972   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
973   PetscErrorCode ierr;
974   PetscBool      merged;
975 
976   PetscFunctionBegin;
977   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
978   /* do nondiagonal part */
979   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
980   if (!merged) {
981     /* send it on its way */
982     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
983     /* do local part */
984     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
985     /* receive remote parts: note this assumes the values are not actually */
986     /* added in yy until the next line, */
987     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
988   } else {
989     /* do local part */
990     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
991     /* send it on its way */
992     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
993     /* values actually were received in the Begin() but we need to call this nop */
994     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
995   }
996   PetscFunctionReturn(0);
997 }
998 
999 EXTERN_C_BEGIN
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1002 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1003 {
1004   MPI_Comm       comm;
1005   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1006   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1007   IS             Me,Notme;
1008   PetscErrorCode ierr;
1009   PetscInt       M,N,first,last,*notme,i;
1010   PetscMPIInt    size;
1011 
1012   PetscFunctionBegin;
1013 
1014   /* Easy test: symmetric diagonal block */
1015   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1016   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1017   if (!*f) PetscFunctionReturn(0);
1018   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1019   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1020   if (size == 1) PetscFunctionReturn(0);
1021 
1022   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1023   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1024   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1025   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1026   for (i=0; i<first; i++) notme[i] = i;
1027   for (i=last; i<M; i++) notme[i-last+first] = i;
1028   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1029   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1030   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1031   Aoff = Aoffs[0];
1032   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1033   Boff = Boffs[0];
1034   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1035   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1036   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1037   ierr = ISDestroy(Me);CHKERRQ(ierr);
1038   ierr = ISDestroy(Notme);CHKERRQ(ierr);
1039 
1040   PetscFunctionReturn(0);
1041 }
1042 EXTERN_C_END
1043 
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1046 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1047 {
1048   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1049   PetscErrorCode ierr;
1050 
1051   PetscFunctionBegin;
1052   /* do nondiagonal part */
1053   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1054   /* send it on its way */
1055   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1056   /* do local part */
1057   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1058   /* receive remote parts */
1059   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 /*
1064   This only works correctly for square matrices where the subblock A->A is the
1065    diagonal block
1066 */
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1069 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1070 {
1071   PetscErrorCode ierr;
1072   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1073 
1074   PetscFunctionBegin;
1075   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1076   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1077   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 #undef __FUNCT__
1082 #define __FUNCT__ "MatScale_MPIAIJ"
1083 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1084 {
1085   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1086   PetscErrorCode ierr;
1087 
1088   PetscFunctionBegin;
1089   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1090   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "MatDestroy_MPIAIJ"
1096 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1097 {
1098   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102 #if defined(PETSC_USE_LOG)
1103   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1104 #endif
1105   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1106   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
1107   if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);}
1108   if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);}
1109 #if defined (PETSC_USE_CTABLE)
1110   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
1111 #else
1112   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1113 #endif
1114   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1115   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
1116   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
1117   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1118   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1119   ierr = PetscFree(aij);CHKERRQ(ierr);
1120 
1121   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1135 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1136 {
1137   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1138   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1139   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1140   PetscErrorCode    ierr;
1141   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1142   int               fd;
1143   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1144   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1145   PetscScalar       *column_values;
1146   PetscInt          message_count,flowcontrolcount;
1147 
1148   PetscFunctionBegin;
1149   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1150   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1151   nz   = A->nz + B->nz;
1152   if (!rank) {
1153     header[0] = MAT_FILE_CLASSID;
1154     header[1] = mat->rmap->N;
1155     header[2] = mat->cmap->N;
1156     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1157     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1158     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1159     /* get largest number of rows any processor has */
1160     rlen = mat->rmap->n;
1161     range = mat->rmap->range;
1162     for (i=1; i<size; i++) {
1163       rlen = PetscMax(rlen,range[i+1] - range[i]);
1164     }
1165   } else {
1166     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1167     rlen = mat->rmap->n;
1168   }
1169 
1170   /* load up the local row counts */
1171   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1172   for (i=0; i<mat->rmap->n; i++) {
1173     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1174   }
1175 
1176   /* store the row lengths to the file */
1177   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1178   if (!rank) {
1179     MPI_Status status;
1180     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1181     for (i=1; i<size; i++) {
1182       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1183       rlen = range[i+1] - range[i];
1184       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1185       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1186     }
1187     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1188   } else {
1189     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1190     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1191     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1192   }
1193   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1194 
1195   /* load up the local column indices */
1196   nzmax = nz; /* )th processor needs space a largest processor needs */
1197   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1198   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1199   cnt  = 0;
1200   for (i=0; i<mat->rmap->n; i++) {
1201     for (j=B->i[i]; j<B->i[i+1]; j++) {
1202       if ( (col = garray[B->j[j]]) > cstart) break;
1203       column_indices[cnt++] = col;
1204     }
1205     for (k=A->i[i]; k<A->i[i+1]; k++) {
1206       column_indices[cnt++] = A->j[k] + cstart;
1207     }
1208     for (; j<B->i[i+1]; j++) {
1209       column_indices[cnt++] = garray[B->j[j]];
1210     }
1211   }
1212   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1213 
1214   /* store the column indices to the file */
1215    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1216   if (!rank) {
1217     MPI_Status status;
1218     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1219     for (i=1; i<size; i++) {
1220       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1221       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1222       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1223       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1224       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1225     }
1226      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1227   } else {
1228     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1229     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1230     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1231     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1232   }
1233   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1234 
1235   /* load up the local column values */
1236   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1237   cnt  = 0;
1238   for (i=0; i<mat->rmap->n; i++) {
1239     for (j=B->i[i]; j<B->i[i+1]; j++) {
1240       if ( garray[B->j[j]] > cstart) break;
1241       column_values[cnt++] = B->a[j];
1242     }
1243     for (k=A->i[i]; k<A->i[i+1]; k++) {
1244       column_values[cnt++] = A->a[k];
1245     }
1246     for (; j<B->i[i+1]; j++) {
1247       column_values[cnt++] = B->a[j];
1248     }
1249   }
1250   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1251 
1252   /* store the column values to the file */
1253    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1254   if (!rank) {
1255     MPI_Status status;
1256     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1257     for (i=1; i<size; i++) {
1258        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1259       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1260       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1261       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1262       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1263     }
1264     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1265   } else {
1266     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1267     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1268     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1269     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1270   }
1271   ierr = PetscFree(column_values);CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1277 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1278 {
1279   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1280   PetscErrorCode    ierr;
1281   PetscMPIInt       rank = aij->rank,size = aij->size;
1282   PetscBool         isdraw,iascii,isbinary;
1283   PetscViewer       sviewer;
1284   PetscViewerFormat format;
1285 
1286   PetscFunctionBegin;
1287   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1288   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1289   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1290   if (iascii) {
1291     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1292     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1293       MatInfo    info;
1294       PetscBool  inodes;
1295 
1296       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1297       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1298       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1299       if (!inodes) {
1300         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1301 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1302       } else {
1303         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1304 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1305       }
1306       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1307       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1308       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1309       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1310       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1311       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1312       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1313       PetscFunctionReturn(0);
1314     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1315       PetscInt   inodecount,inodelimit,*inodes;
1316       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1317       if (inodes) {
1318         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1319       } else {
1320         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1321       }
1322       PetscFunctionReturn(0);
1323     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1324       PetscFunctionReturn(0);
1325     }
1326   } else if (isbinary) {
1327     if (size == 1) {
1328       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1329       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1330     } else {
1331       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1332     }
1333     PetscFunctionReturn(0);
1334   } else if (isdraw) {
1335     PetscDraw  draw;
1336     PetscBool  isnull;
1337     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1338     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1339   }
1340 
1341   if (size == 1) {
1342     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1343     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1344   } else {
1345     /* assemble the entire matrix onto first processor. */
1346     Mat         A;
1347     Mat_SeqAIJ  *Aloc;
1348     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1349     MatScalar   *a;
1350 
1351     if (mat->rmap->N > 1024) {
1352       PetscBool  flg = PETSC_FALSE;
1353 
1354       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1355       if (!flg) {
1356         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1357       }
1358     }
1359 
1360     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1361     if (!rank) {
1362       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1363     } else {
1364       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1365     }
1366     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1367     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1368     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1369     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1370 
1371     /* copy over the A part */
1372     Aloc = (Mat_SeqAIJ*)aij->A->data;
1373     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1374     row = mat->rmap->rstart;
1375     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1376     for (i=0; i<m; i++) {
1377       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1378       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1379     }
1380     aj = Aloc->j;
1381     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1382 
1383     /* copy over the B part */
1384     Aloc = (Mat_SeqAIJ*)aij->B->data;
1385     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1386     row  = mat->rmap->rstart;
1387     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1388     ct   = cols;
1389     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1390     for (i=0; i<m; i++) {
1391       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1392       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1393     }
1394     ierr = PetscFree(ct);CHKERRQ(ierr);
1395     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1396     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1397     /*
1398        Everyone has to call to draw the matrix since the graphics waits are
1399        synchronized across all processors that share the PetscDraw object
1400     */
1401     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1402     if (!rank) {
1403       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1404       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1405       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1406       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1407     }
1408     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1409     ierr = MatDestroy(A);CHKERRQ(ierr);
1410   }
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatView_MPIAIJ"
1416 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1417 {
1418   PetscErrorCode ierr;
1419   PetscBool      iascii,isdraw,issocket,isbinary;
1420 
1421   PetscFunctionBegin;
1422   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1423   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1424   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1425   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1426   if (iascii || isdraw || isbinary || issocket) {
1427     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1428   } else {
1429     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1430   }
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatSOR_MPIAIJ"
1436 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1437 {
1438   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1439   PetscErrorCode ierr;
1440   Vec            bb1 = 0;
1441   PetscBool      hasop;
1442 
1443   PetscFunctionBegin;
1444   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1445     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1446   }
1447 
1448   if (flag == SOR_APPLY_UPPER) {
1449     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1450     PetscFunctionReturn(0);
1451   }
1452 
1453   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1454     if (flag & SOR_ZERO_INITIAL_GUESS) {
1455       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1456       its--;
1457     }
1458 
1459     while (its--) {
1460       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1461       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1462 
1463       /* update rhs: bb1 = bb - B*x */
1464       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1465       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1466 
1467       /* local sweep */
1468       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1469     }
1470   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1471     if (flag & SOR_ZERO_INITIAL_GUESS) {
1472       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1473       its--;
1474     }
1475     while (its--) {
1476       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1477       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1478 
1479       /* update rhs: bb1 = bb - B*x */
1480       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1481       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1482 
1483       /* local sweep */
1484       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1485     }
1486   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1487     if (flag & SOR_ZERO_INITIAL_GUESS) {
1488       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1489       its--;
1490     }
1491     while (its--) {
1492       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1493       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1494 
1495       /* update rhs: bb1 = bb - B*x */
1496       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1497       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1498 
1499       /* local sweep */
1500       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1501     }
1502   }  else if (flag & SOR_EISENSTAT) {
1503     Vec         xx1;
1504 
1505     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1506     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1507 
1508     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1509     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1510     if (!mat->diag) {
1511       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1512       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1513     }
1514     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1515     if (hasop) {
1516       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1517     } else {
1518       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1519     }
1520     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1521 
1522     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1523 
1524     /* local sweep */
1525     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1526     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1527     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1528   } else {
1529     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1530   }
1531 
1532   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatPermute_MPIAIJ"
1538 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1539 {
1540   MPI_Comm       comm,pcomm;
1541   PetscInt       first,local_size,nrows;
1542   const PetscInt *rows;
1543   PetscMPIInt    size;
1544   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1545   PetscErrorCode ierr;
1546 
1547   PetscFunctionBegin;
1548   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1549   /* make a collective version of 'rowp' */
1550   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1551   if (pcomm==comm) {
1552     crowp = rowp;
1553   } else {
1554     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1555     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1556     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1557     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1558   }
1559   /* collect the global row permutation and invert it */
1560   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1561   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1562   if (pcomm!=comm) {
1563     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1564   }
1565   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1566   /* get the local target indices */
1567   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1568   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1569   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1570   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1571   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1572   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1573   /* the column permutation is so much easier;
1574      make a local version of 'colp' and invert it */
1575   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1576   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1577   if (size==1) {
1578     lcolp = colp;
1579   } else {
1580     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1581     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1582     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1583   }
1584   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1585   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1586   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1587   if (size>1) {
1588     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1589     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1590   }
1591   /* now we just get the submatrix */
1592   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1593   /* clean up */
1594   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1595   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 #undef __FUNCT__
1600 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1601 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1602 {
1603   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1604   Mat            A = mat->A,B = mat->B;
1605   PetscErrorCode ierr;
1606   PetscReal      isend[5],irecv[5];
1607 
1608   PetscFunctionBegin;
1609   info->block_size     = 1.0;
1610   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1611   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1612   isend[3] = info->memory;  isend[4] = info->mallocs;
1613   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1614   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1615   isend[3] += info->memory;  isend[4] += info->mallocs;
1616   if (flag == MAT_LOCAL) {
1617     info->nz_used      = isend[0];
1618     info->nz_allocated = isend[1];
1619     info->nz_unneeded  = isend[2];
1620     info->memory       = isend[3];
1621     info->mallocs      = isend[4];
1622   } else if (flag == MAT_GLOBAL_MAX) {
1623     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1624     info->nz_used      = irecv[0];
1625     info->nz_allocated = irecv[1];
1626     info->nz_unneeded  = irecv[2];
1627     info->memory       = irecv[3];
1628     info->mallocs      = irecv[4];
1629   } else if (flag == MAT_GLOBAL_SUM) {
1630     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1631     info->nz_used      = irecv[0];
1632     info->nz_allocated = irecv[1];
1633     info->nz_unneeded  = irecv[2];
1634     info->memory       = irecv[3];
1635     info->mallocs      = irecv[4];
1636   }
1637   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1638   info->fill_ratio_needed = 0;
1639   info->factor_mallocs    = 0;
1640 
1641   PetscFunctionReturn(0);
1642 }
1643 
1644 #undef __FUNCT__
1645 #define __FUNCT__ "MatSetOption_MPIAIJ"
1646 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1647 {
1648   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1649   PetscErrorCode ierr;
1650 
1651   PetscFunctionBegin;
1652   switch (op) {
1653   case MAT_NEW_NONZERO_LOCATIONS:
1654   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1655   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1656   case MAT_KEEP_NONZERO_PATTERN:
1657   case MAT_NEW_NONZERO_LOCATION_ERR:
1658   case MAT_USE_INODES:
1659   case MAT_IGNORE_ZERO_ENTRIES:
1660     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1661     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1662     break;
1663   case MAT_ROW_ORIENTED:
1664     a->roworiented = flg;
1665     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1666     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1667     break;
1668   case MAT_NEW_DIAGONALS:
1669     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1670     break;
1671   case MAT_IGNORE_OFF_PROC_ENTRIES:
1672     a->donotstash = PETSC_TRUE;
1673     break;
1674   case MAT_SPD:
1675     A->spd_set                         = PETSC_TRUE;
1676     A->spd                             = flg;
1677     if (flg) {
1678       A->symmetric                     = PETSC_TRUE;
1679       A->structurally_symmetric        = PETSC_TRUE;
1680       A->symmetric_set                 = PETSC_TRUE;
1681       A->structurally_symmetric_set    = PETSC_TRUE;
1682     }
1683     break;
1684   case MAT_SYMMETRIC:
1685     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1686     break;
1687   case MAT_STRUCTURALLY_SYMMETRIC:
1688     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1689     break;
1690   case MAT_HERMITIAN:
1691     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1692     break;
1693   case MAT_SYMMETRY_ETERNAL:
1694     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1695     break;
1696   default:
1697     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1698   }
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatGetRow_MPIAIJ"
1704 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1705 {
1706   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1707   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1708   PetscErrorCode ierr;
1709   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1710   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1711   PetscInt       *cmap,*idx_p;
1712 
1713   PetscFunctionBegin;
1714   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1715   mat->getrowactive = PETSC_TRUE;
1716 
1717   if (!mat->rowvalues && (idx || v)) {
1718     /*
1719         allocate enough space to hold information from the longest row.
1720     */
1721     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1722     PetscInt   max = 1,tmp;
1723     for (i=0; i<matin->rmap->n; i++) {
1724       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1725       if (max < tmp) { max = tmp; }
1726     }
1727     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1728   }
1729 
1730   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1731   lrow = row - rstart;
1732 
1733   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1734   if (!v)   {pvA = 0; pvB = 0;}
1735   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1736   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1737   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1738   nztot = nzA + nzB;
1739 
1740   cmap  = mat->garray;
1741   if (v  || idx) {
1742     if (nztot) {
1743       /* Sort by increasing column numbers, assuming A and B already sorted */
1744       PetscInt imark = -1;
1745       if (v) {
1746         *v = v_p = mat->rowvalues;
1747         for (i=0; i<nzB; i++) {
1748           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1749           else break;
1750         }
1751         imark = i;
1752         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1753         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1754       }
1755       if (idx) {
1756         *idx = idx_p = mat->rowindices;
1757         if (imark > -1) {
1758           for (i=0; i<imark; i++) {
1759             idx_p[i] = cmap[cworkB[i]];
1760           }
1761         } else {
1762           for (i=0; i<nzB; i++) {
1763             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1764             else break;
1765           }
1766           imark = i;
1767         }
1768         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1769         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1770       }
1771     } else {
1772       if (idx) *idx = 0;
1773       if (v)   *v   = 0;
1774     }
1775   }
1776   *nz = nztot;
1777   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1778   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1779   PetscFunctionReturn(0);
1780 }
1781 
1782 #undef __FUNCT__
1783 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1784 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1785 {
1786   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1787 
1788   PetscFunctionBegin;
1789   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1790   aij->getrowactive = PETSC_FALSE;
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 #undef __FUNCT__
1795 #define __FUNCT__ "MatNorm_MPIAIJ"
1796 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1797 {
1798   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1799   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1800   PetscErrorCode ierr;
1801   PetscInt       i,j,cstart = mat->cmap->rstart;
1802   PetscReal      sum = 0.0;
1803   MatScalar      *v;
1804 
1805   PetscFunctionBegin;
1806   if (aij->size == 1) {
1807     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1808   } else {
1809     if (type == NORM_FROBENIUS) {
1810       v = amat->a;
1811       for (i=0; i<amat->nz; i++) {
1812 #if defined(PETSC_USE_COMPLEX)
1813         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1814 #else
1815         sum += (*v)*(*v); v++;
1816 #endif
1817       }
1818       v = bmat->a;
1819       for (i=0; i<bmat->nz; i++) {
1820 #if defined(PETSC_USE_COMPLEX)
1821         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1822 #else
1823         sum += (*v)*(*v); v++;
1824 #endif
1825       }
1826       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1827       *norm = sqrt(*norm);
1828     } else if (type == NORM_1) { /* max column norm */
1829       PetscReal *tmp,*tmp2;
1830       PetscInt  *jj,*garray = aij->garray;
1831       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1832       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1833       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1834       *norm = 0.0;
1835       v = amat->a; jj = amat->j;
1836       for (j=0; j<amat->nz; j++) {
1837         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1838       }
1839       v = bmat->a; jj = bmat->j;
1840       for (j=0; j<bmat->nz; j++) {
1841         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1842       }
1843       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1844       for (j=0; j<mat->cmap->N; j++) {
1845         if (tmp2[j] > *norm) *norm = tmp2[j];
1846       }
1847       ierr = PetscFree(tmp);CHKERRQ(ierr);
1848       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1849     } else if (type == NORM_INFINITY) { /* max row norm */
1850       PetscReal ntemp = 0.0;
1851       for (j=0; j<aij->A->rmap->n; j++) {
1852         v = amat->a + amat->i[j];
1853         sum = 0.0;
1854         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1855           sum += PetscAbsScalar(*v); v++;
1856         }
1857         v = bmat->a + bmat->i[j];
1858         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1859           sum += PetscAbsScalar(*v); v++;
1860         }
1861         if (sum > ntemp) ntemp = sum;
1862       }
1863       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1864     } else {
1865       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1866     }
1867   }
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 #undef __FUNCT__
1872 #define __FUNCT__ "MatTranspose_MPIAIJ"
1873 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1874 {
1875   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1876   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1877   PetscErrorCode ierr;
1878   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1879   PetscInt       cstart=A->cmap->rstart,ncol;
1880   Mat            B;
1881   MatScalar      *array;
1882 
1883   PetscFunctionBegin;
1884   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1885 
1886   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1887   ai = Aloc->i; aj = Aloc->j;
1888   bi = Bloc->i; bj = Bloc->j;
1889   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1890     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1891     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1892     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1893     for (i=0; i<ai[ma]; i++){
1894       d_nnz[aj[i]] ++;
1895       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1896     }
1897 
1898     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1899     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1900     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1901     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1902     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1903   } else {
1904     B = *matout;
1905   }
1906 
1907   /* copy over the A part */
1908   array = Aloc->a;
1909   row = A->rmap->rstart;
1910   for (i=0; i<ma; i++) {
1911     ncol = ai[i+1]-ai[i];
1912     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1913     row++; array += ncol; aj += ncol;
1914   }
1915   aj = Aloc->j;
1916   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1917 
1918   /* copy over the B part */
1919   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1920   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1921   array = Bloc->a;
1922   row = A->rmap->rstart;
1923   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1924   cols_tmp = cols;
1925   for (i=0; i<mb; i++) {
1926     ncol = bi[i+1]-bi[i];
1927     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1928     row++; array += ncol; cols_tmp += ncol;
1929   }
1930   ierr = PetscFree(cols);CHKERRQ(ierr);
1931 
1932   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1933   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1934   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1935     *matout = B;
1936   } else {
1937     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1938   }
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1944 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1945 {
1946   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1947   Mat            a = aij->A,b = aij->B;
1948   PetscErrorCode ierr;
1949   PetscInt       s1,s2,s3;
1950 
1951   PetscFunctionBegin;
1952   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1953   if (rr) {
1954     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1955     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1956     /* Overlap communication with computation. */
1957     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1958   }
1959   if (ll) {
1960     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1961     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1962     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1963   }
1964   /* scale  the diagonal block */
1965   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1966 
1967   if (rr) {
1968     /* Do a scatter end and then right scale the off-diagonal block */
1969     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1970     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1971   }
1972 
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 #undef __FUNCT__
1977 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1978 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1979 {
1980   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1981   PetscErrorCode ierr;
1982 
1983   PetscFunctionBegin;
1984   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1985   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1986   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1987   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1988   PetscFunctionReturn(0);
1989 }
1990 #undef __FUNCT__
1991 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1992 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1993 {
1994   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1995   PetscErrorCode ierr;
1996 
1997   PetscFunctionBegin;
1998   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 #undef __FUNCT__
2003 #define __FUNCT__ "MatEqual_MPIAIJ"
2004 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2005 {
2006   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2007   Mat            a,b,c,d;
2008   PetscBool      flg;
2009   PetscErrorCode ierr;
2010 
2011   PetscFunctionBegin;
2012   a = matA->A; b = matA->B;
2013   c = matB->A; d = matB->B;
2014 
2015   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2016   if (flg) {
2017     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2018   }
2019   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 #undef __FUNCT__
2024 #define __FUNCT__ "MatCopy_MPIAIJ"
2025 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2026 {
2027   PetscErrorCode ierr;
2028   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2029   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2030 
2031   PetscFunctionBegin;
2032   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2033   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2034     /* because of the column compression in the off-processor part of the matrix a->B,
2035        the number of columns in a->B and b->B may be different, hence we cannot call
2036        the MatCopy() directly on the two parts. If need be, we can provide a more
2037        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2038        then copying the submatrices */
2039     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2040   } else {
2041     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2042     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2043   }
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 #undef __FUNCT__
2048 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2049 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2050 {
2051   PetscErrorCode ierr;
2052 
2053   PetscFunctionBegin;
2054   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 #undef __FUNCT__
2059 #define __FUNCT__ "MatAXPY_MPIAIJ"
2060 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2061 {
2062   PetscErrorCode ierr;
2063   PetscInt       i;
2064   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2065   PetscBLASInt   bnz,one=1;
2066   Mat_SeqAIJ     *x,*y;
2067 
2068   PetscFunctionBegin;
2069   if (str == SAME_NONZERO_PATTERN) {
2070     PetscScalar alpha = a;
2071     x = (Mat_SeqAIJ *)xx->A->data;
2072     y = (Mat_SeqAIJ *)yy->A->data;
2073     bnz = PetscBLASIntCast(x->nz);
2074     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2075     x = (Mat_SeqAIJ *)xx->B->data;
2076     y = (Mat_SeqAIJ *)yy->B->data;
2077     bnz = PetscBLASIntCast(x->nz);
2078     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2079   } else if (str == SUBSET_NONZERO_PATTERN) {
2080     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2081 
2082     x = (Mat_SeqAIJ *)xx->B->data;
2083     y = (Mat_SeqAIJ *)yy->B->data;
2084     if (y->xtoy && y->XtoY != xx->B) {
2085       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2086       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2087     }
2088     if (!y->xtoy) { /* get xtoy */
2089       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2090       y->XtoY = xx->B;
2091       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2092     }
2093     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2094   } else {
2095     Mat B;
2096     PetscInt *nnz_d,*nnz_o;
2097     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2098     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2099     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2100     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2101     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2102     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2103     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2104     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2105     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2106     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2107     ierr = MatHeaderReplace(Y,B);
2108     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2109     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2110   }
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
2115 
2116 #undef __FUNCT__
2117 #define __FUNCT__ "MatConjugate_MPIAIJ"
2118 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
2119 {
2120 #if defined(PETSC_USE_COMPLEX)
2121   PetscErrorCode ierr;
2122   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2123 
2124   PetscFunctionBegin;
2125   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2126   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2127 #else
2128   PetscFunctionBegin;
2129 #endif
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 #undef __FUNCT__
2134 #define __FUNCT__ "MatRealPart_MPIAIJ"
2135 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2136 {
2137   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2138   PetscErrorCode ierr;
2139 
2140   PetscFunctionBegin;
2141   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2142   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 #undef __FUNCT__
2147 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2148 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2149 {
2150   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2151   PetscErrorCode ierr;
2152 
2153   PetscFunctionBegin;
2154   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2155   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 #ifdef PETSC_HAVE_PBGL
2160 
2161 #include <boost/parallel/mpi/bsp_process_group.hpp>
2162 #include <boost/graph/distributed/ilu_default_graph.hpp>
2163 #include <boost/graph/distributed/ilu_0_block.hpp>
2164 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2165 #include <boost/graph/distributed/petsc/interface.hpp>
2166 #include <boost/multi_array.hpp>
2167 #include <boost/parallel/distributed_property_map->hpp>
2168 
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2171 /*
2172   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2173 */
2174 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2175 {
2176   namespace petsc = boost::distributed::petsc;
2177 
2178   namespace graph_dist = boost::graph::distributed;
2179   using boost::graph::distributed::ilu_default::process_group_type;
2180   using boost::graph::ilu_permuted;
2181 
2182   PetscBool       row_identity, col_identity;
2183   PetscContainer  c;
2184   PetscInt        m, n, M, N;
2185   PetscErrorCode  ierr;
2186 
2187   PetscFunctionBegin;
2188   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2189   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2190   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2191   if (!row_identity || !col_identity) {
2192     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2193   }
2194 
2195   process_group_type pg;
2196   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2197   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2198   lgraph_type&   level_graph = *lgraph_p;
2199   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2200 
2201   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2202   ilu_permuted(level_graph);
2203 
2204   /* put together the new matrix */
2205   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2206   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2207   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2208   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2209   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2210   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2211   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2212 
2213   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2214   ierr = PetscContainerSetPointer(c, lgraph_p);
2215   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 #undef __FUNCT__
2220 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2221 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2222 {
2223   PetscFunctionBegin;
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 #undef __FUNCT__
2228 #define __FUNCT__ "MatSolve_MPIAIJ"
2229 /*
2230   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2231 */
2232 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2233 {
2234   namespace graph_dist = boost::graph::distributed;
2235 
2236   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2237   lgraph_type*   lgraph_p;
2238   PetscContainer c;
2239   PetscErrorCode ierr;
2240 
2241   PetscFunctionBegin;
2242   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2243   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2244   ierr = VecCopy(b, x);CHKERRQ(ierr);
2245 
2246   PetscScalar* array_x;
2247   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2248   PetscInt sx;
2249   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2250 
2251   PetscScalar* array_b;
2252   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2253   PetscInt sb;
2254   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2255 
2256   lgraph_type&   level_graph = *lgraph_p;
2257   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2258 
2259   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2260   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2261                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2262 
2263   typedef boost::iterator_property_map<array_ref_type::iterator,
2264                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2265   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2266                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2267 
2268   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2269 
2270   PetscFunctionReturn(0);
2271 }
2272 #endif
2273 
2274 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2275   PetscInt       nzlocal,nsends,nrecvs;
2276   PetscMPIInt    *send_rank,*recv_rank;
2277   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2278   PetscScalar    *sbuf_a,**rbuf_a;
2279   PetscErrorCode (*MatDestroy)(Mat);
2280 } Mat_Redundant;
2281 
2282 #undef __FUNCT__
2283 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2284 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2285 {
2286   PetscErrorCode       ierr;
2287   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2288   PetscInt             i;
2289 
2290   PetscFunctionBegin;
2291   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2292   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2293   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2294   for (i=0; i<redund->nrecvs; i++){
2295     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2296     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2297   }
2298   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2299   ierr = PetscFree(redund);CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatDestroy_MatRedundant"
2305 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2306 {
2307   PetscErrorCode  ierr;
2308   PetscContainer  container;
2309   Mat_Redundant   *redund=PETSC_NULL;
2310 
2311   PetscFunctionBegin;
2312   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2313   if (container) {
2314     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2315   } else {
2316     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2317   }
2318   A->ops->destroy = redund->MatDestroy;
2319   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2320   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2321   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2327 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2328 {
2329   PetscMPIInt    rank,size;
2330   MPI_Comm       comm=((PetscObject)mat)->comm;
2331   PetscErrorCode ierr;
2332   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2333   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2334   PetscInt       *rowrange=mat->rmap->range;
2335   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2336   Mat            A=aij->A,B=aij->B,C=*matredundant;
2337   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2338   PetscScalar    *sbuf_a;
2339   PetscInt       nzlocal=a->nz+b->nz;
2340   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2341   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2342   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2343   MatScalar      *aworkA,*aworkB;
2344   PetscScalar    *vals;
2345   PetscMPIInt    tag1,tag2,tag3,imdex;
2346   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2347                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2348   MPI_Status     recv_status,*send_status;
2349   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2350   PetscInt       **rbuf_j=PETSC_NULL;
2351   PetscScalar    **rbuf_a=PETSC_NULL;
2352   Mat_Redundant  *redund=PETSC_NULL;
2353   PetscContainer container;
2354 
2355   PetscFunctionBegin;
2356   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2357   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2358 
2359   if (reuse == MAT_REUSE_MATRIX) {
2360     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2361     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2362     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2363     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2364     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2365     if (container) {
2366       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2367     } else {
2368       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2369     }
2370     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2371 
2372     nsends    = redund->nsends;
2373     nrecvs    = redund->nrecvs;
2374     send_rank = redund->send_rank;
2375     recv_rank = redund->recv_rank;
2376     sbuf_nz   = redund->sbuf_nz;
2377     rbuf_nz   = redund->rbuf_nz;
2378     sbuf_j    = redund->sbuf_j;
2379     sbuf_a    = redund->sbuf_a;
2380     rbuf_j    = redund->rbuf_j;
2381     rbuf_a    = redund->rbuf_a;
2382   }
2383 
2384   if (reuse == MAT_INITIAL_MATRIX){
2385     PetscMPIInt  subrank,subsize;
2386     PetscInt     nleftover,np_subcomm;
2387     /* get the destination processors' id send_rank, nsends and nrecvs */
2388     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2389     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2390     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2391     np_subcomm = size/nsubcomm;
2392     nleftover  = size - nsubcomm*np_subcomm;
2393     nsends = 0; nrecvs = 0;
2394     for (i=0; i<size; i++){ /* i=rank*/
2395       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2396         send_rank[nsends] = i; nsends++;
2397         recv_rank[nrecvs++] = i;
2398       }
2399     }
2400     if (rank >= size - nleftover){/* this proc is a leftover processor */
2401       i = size-nleftover-1;
2402       j = 0;
2403       while (j < nsubcomm - nleftover){
2404         send_rank[nsends++] = i;
2405         i--; j++;
2406       }
2407     }
2408 
2409     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2410       for (i=0; i<nleftover; i++){
2411         recv_rank[nrecvs++] = size-nleftover+i;
2412       }
2413     }
2414 
2415     /* allocate sbuf_j, sbuf_a */
2416     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2417     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2418     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2419   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2420 
2421   /* copy mat's local entries into the buffers */
2422   if (reuse == MAT_INITIAL_MATRIX){
2423     rownz_max = 0;
2424     rptr = sbuf_j;
2425     cols = sbuf_j + rend-rstart + 1;
2426     vals = sbuf_a;
2427     rptr[0] = 0;
2428     for (i=0; i<rend-rstart; i++){
2429       row = i + rstart;
2430       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2431       ncols  = nzA + nzB;
2432       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2433       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2434       /* load the column indices for this row into cols */
2435       lwrite = 0;
2436       for (l=0; l<nzB; l++) {
2437         if ((ctmp = bmap[cworkB[l]]) < cstart){
2438           vals[lwrite]   = aworkB[l];
2439           cols[lwrite++] = ctmp;
2440         }
2441       }
2442       for (l=0; l<nzA; l++){
2443         vals[lwrite]   = aworkA[l];
2444         cols[lwrite++] = cstart + cworkA[l];
2445       }
2446       for (l=0; l<nzB; l++) {
2447         if ((ctmp = bmap[cworkB[l]]) >= cend){
2448           vals[lwrite]   = aworkB[l];
2449           cols[lwrite++] = ctmp;
2450         }
2451       }
2452       vals += ncols;
2453       cols += ncols;
2454       rptr[i+1] = rptr[i] + ncols;
2455       if (rownz_max < ncols) rownz_max = ncols;
2456     }
2457     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2458   } else { /* only copy matrix values into sbuf_a */
2459     rptr = sbuf_j;
2460     vals = sbuf_a;
2461     rptr[0] = 0;
2462     for (i=0; i<rend-rstart; i++){
2463       row = i + rstart;
2464       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2465       ncols  = nzA + nzB;
2466       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2467       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2468       lwrite = 0;
2469       for (l=0; l<nzB; l++) {
2470         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2471       }
2472       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2473       for (l=0; l<nzB; l++) {
2474         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2475       }
2476       vals += ncols;
2477       rptr[i+1] = rptr[i] + ncols;
2478     }
2479   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2480 
2481   /* send nzlocal to others, and recv other's nzlocal */
2482   /*--------------------------------------------------*/
2483   if (reuse == MAT_INITIAL_MATRIX){
2484     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2485     s_waits2 = s_waits3 + nsends;
2486     s_waits1 = s_waits2 + nsends;
2487     r_waits1 = s_waits1 + nsends;
2488     r_waits2 = r_waits1 + nrecvs;
2489     r_waits3 = r_waits2 + nrecvs;
2490   } else {
2491     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2492     r_waits3 = s_waits3 + nsends;
2493   }
2494 
2495   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2496   if (reuse == MAT_INITIAL_MATRIX){
2497     /* get new tags to keep the communication clean */
2498     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2499     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2500     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2501 
2502     /* post receives of other's nzlocal */
2503     for (i=0; i<nrecvs; i++){
2504       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2505     }
2506     /* send nzlocal to others */
2507     for (i=0; i<nsends; i++){
2508       sbuf_nz[i] = nzlocal;
2509       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2510     }
2511     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2512     count = nrecvs;
2513     while (count) {
2514       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2515       recv_rank[imdex] = recv_status.MPI_SOURCE;
2516       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2517       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2518 
2519       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2520       rbuf_nz[imdex] += i + 2;
2521       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2522       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2523       count--;
2524     }
2525     /* wait on sends of nzlocal */
2526     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2527     /* send mat->i,j to others, and recv from other's */
2528     /*------------------------------------------------*/
2529     for (i=0; i<nsends; i++){
2530       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2531       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2532     }
2533     /* wait on receives of mat->i,j */
2534     /*------------------------------*/
2535     count = nrecvs;
2536     while (count) {
2537       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2538       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2539       count--;
2540     }
2541     /* wait on sends of mat->i,j */
2542     /*---------------------------*/
2543     if (nsends) {
2544       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2545     }
2546   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2547 
2548   /* post receives, send and receive mat->a */
2549   /*----------------------------------------*/
2550   for (imdex=0; imdex<nrecvs; imdex++) {
2551     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2552   }
2553   for (i=0; i<nsends; i++){
2554     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2555   }
2556   count = nrecvs;
2557   while (count) {
2558     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2559     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2560     count--;
2561   }
2562   if (nsends) {
2563     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2564   }
2565 
2566   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2567 
2568   /* create redundant matrix */
2569   /*-------------------------*/
2570   if (reuse == MAT_INITIAL_MATRIX){
2571     /* compute rownz_max for preallocation */
2572     for (imdex=0; imdex<nrecvs; imdex++){
2573       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2574       rptr = rbuf_j[imdex];
2575       for (i=0; i<j; i++){
2576         ncols = rptr[i+1] - rptr[i];
2577         if (rownz_max < ncols) rownz_max = ncols;
2578       }
2579     }
2580 
2581     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2582     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2583     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2584     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2585     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2586   } else {
2587     C = *matredundant;
2588   }
2589 
2590   /* insert local matrix entries */
2591   rptr = sbuf_j;
2592   cols = sbuf_j + rend-rstart + 1;
2593   vals = sbuf_a;
2594   for (i=0; i<rend-rstart; i++){
2595     row   = i + rstart;
2596     ncols = rptr[i+1] - rptr[i];
2597     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2598     vals += ncols;
2599     cols += ncols;
2600   }
2601   /* insert received matrix entries */
2602   for (imdex=0; imdex<nrecvs; imdex++){
2603     rstart = rowrange[recv_rank[imdex]];
2604     rend   = rowrange[recv_rank[imdex]+1];
2605     rptr = rbuf_j[imdex];
2606     cols = rbuf_j[imdex] + rend-rstart + 1;
2607     vals = rbuf_a[imdex];
2608     for (i=0; i<rend-rstart; i++){
2609       row   = i + rstart;
2610       ncols = rptr[i+1] - rptr[i];
2611       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2612       vals += ncols;
2613       cols += ncols;
2614     }
2615   }
2616   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2617   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2618   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2619   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2620   if (reuse == MAT_INITIAL_MATRIX){
2621     PetscContainer container;
2622     *matredundant = C;
2623     /* create a supporting struct and attach it to C for reuse */
2624     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2625     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2626     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2627     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2628     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2629 
2630     redund->nzlocal = nzlocal;
2631     redund->nsends  = nsends;
2632     redund->nrecvs  = nrecvs;
2633     redund->send_rank = send_rank;
2634     redund->recv_rank = recv_rank;
2635     redund->sbuf_nz = sbuf_nz;
2636     redund->rbuf_nz = rbuf_nz;
2637     redund->sbuf_j  = sbuf_j;
2638     redund->sbuf_a  = sbuf_a;
2639     redund->rbuf_j  = rbuf_j;
2640     redund->rbuf_a  = rbuf_a;
2641 
2642     redund->MatDestroy = C->ops->destroy;
2643     C->ops->destroy    = MatDestroy_MatRedundant;
2644   }
2645   PetscFunctionReturn(0);
2646 }
2647 
2648 #undef __FUNCT__
2649 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2650 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2651 {
2652   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2653   PetscErrorCode ierr;
2654   PetscInt       i,*idxb = 0;
2655   PetscScalar    *va,*vb;
2656   Vec            vtmp;
2657 
2658   PetscFunctionBegin;
2659   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2660   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2661   if (idx) {
2662     for (i=0; i<A->rmap->n; i++) {
2663       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2664     }
2665   }
2666 
2667   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2668   if (idx) {
2669     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2670   }
2671   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2672   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2673 
2674   for (i=0; i<A->rmap->n; i++){
2675     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2676       va[i] = vb[i];
2677       if (idx) idx[i] = a->garray[idxb[i]];
2678     }
2679   }
2680 
2681   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2682   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2683   if (idxb) {
2684     ierr = PetscFree(idxb);CHKERRQ(ierr);
2685   }
2686   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 #undef __FUNCT__
2691 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2692 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2693 {
2694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2695   PetscErrorCode ierr;
2696   PetscInt       i,*idxb = 0;
2697   PetscScalar    *va,*vb;
2698   Vec            vtmp;
2699 
2700   PetscFunctionBegin;
2701   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2702   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2703   if (idx) {
2704     for (i=0; i<A->cmap->n; i++) {
2705       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2706     }
2707   }
2708 
2709   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2710   if (idx) {
2711     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2712   }
2713   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2714   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2715 
2716   for (i=0; i<A->rmap->n; i++){
2717     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2718       va[i] = vb[i];
2719       if (idx) idx[i] = a->garray[idxb[i]];
2720     }
2721   }
2722 
2723   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2724   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2725   if (idxb) {
2726     ierr = PetscFree(idxb);CHKERRQ(ierr);
2727   }
2728   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2729   PetscFunctionReturn(0);
2730 }
2731 
2732 #undef __FUNCT__
2733 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2734 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2735 {
2736   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2737   PetscInt       n      = A->rmap->n;
2738   PetscInt       cstart = A->cmap->rstart;
2739   PetscInt      *cmap   = mat->garray;
2740   PetscInt      *diagIdx, *offdiagIdx;
2741   Vec            diagV, offdiagV;
2742   PetscScalar   *a, *diagA, *offdiagA;
2743   PetscInt       r;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2748   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2749   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2750   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2751   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2752   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2753   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2754   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2755   for(r = 0; r < n; ++r) {
2756     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2757       a[r]   = diagA[r];
2758       idx[r] = cstart + diagIdx[r];
2759     } else {
2760       a[r]   = offdiagA[r];
2761       idx[r] = cmap[offdiagIdx[r]];
2762     }
2763   }
2764   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2765   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2766   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2767   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2768   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2769   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 #undef __FUNCT__
2774 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2775 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2776 {
2777   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2778   PetscInt       n      = A->rmap->n;
2779   PetscInt       cstart = A->cmap->rstart;
2780   PetscInt      *cmap   = mat->garray;
2781   PetscInt      *diagIdx, *offdiagIdx;
2782   Vec            diagV, offdiagV;
2783   PetscScalar   *a, *diagA, *offdiagA;
2784   PetscInt       r;
2785   PetscErrorCode ierr;
2786 
2787   PetscFunctionBegin;
2788   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2789   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2790   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2791   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2792   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2793   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2794   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2795   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2796   for(r = 0; r < n; ++r) {
2797     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2798       a[r]   = diagA[r];
2799       idx[r] = cstart + diagIdx[r];
2800     } else {
2801       a[r]   = offdiagA[r];
2802       idx[r] = cmap[offdiagIdx[r]];
2803     }
2804   }
2805   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2806   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2807   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2808   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2809   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2810   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2811   PetscFunctionReturn(0);
2812 }
2813 
2814 #undef __FUNCT__
2815 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2816 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2817 {
2818   PetscErrorCode ierr;
2819   Mat            *dummy;
2820 
2821   PetscFunctionBegin;
2822   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2823   *newmat = *dummy;
2824   ierr = PetscFree(dummy);CHKERRQ(ierr);
2825   PetscFunctionReturn(0);
2826 }
2827 
2828 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2829 /* -------------------------------------------------------------------*/
2830 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2831        MatGetRow_MPIAIJ,
2832        MatRestoreRow_MPIAIJ,
2833        MatMult_MPIAIJ,
2834 /* 4*/ MatMultAdd_MPIAIJ,
2835        MatMultTranspose_MPIAIJ,
2836        MatMultTransposeAdd_MPIAIJ,
2837 #ifdef PETSC_HAVE_PBGL
2838        MatSolve_MPIAIJ,
2839 #else
2840        0,
2841 #endif
2842        0,
2843        0,
2844 /*10*/ 0,
2845        0,
2846        0,
2847        MatSOR_MPIAIJ,
2848        MatTranspose_MPIAIJ,
2849 /*15*/ MatGetInfo_MPIAIJ,
2850        MatEqual_MPIAIJ,
2851        MatGetDiagonal_MPIAIJ,
2852        MatDiagonalScale_MPIAIJ,
2853        MatNorm_MPIAIJ,
2854 /*20*/ MatAssemblyBegin_MPIAIJ,
2855        MatAssemblyEnd_MPIAIJ,
2856        MatSetOption_MPIAIJ,
2857        MatZeroEntries_MPIAIJ,
2858 /*24*/ MatZeroRows_MPIAIJ,
2859        0,
2860 #ifdef PETSC_HAVE_PBGL
2861        0,
2862 #else
2863        0,
2864 #endif
2865        0,
2866        0,
2867 /*29*/ MatSetUpPreallocation_MPIAIJ,
2868 #ifdef PETSC_HAVE_PBGL
2869        0,
2870 #else
2871        0,
2872 #endif
2873        0,
2874        0,
2875        0,
2876 /*34*/ MatDuplicate_MPIAIJ,
2877        0,
2878        0,
2879        0,
2880        0,
2881 /*39*/ MatAXPY_MPIAIJ,
2882        MatGetSubMatrices_MPIAIJ,
2883        MatIncreaseOverlap_MPIAIJ,
2884        MatGetValues_MPIAIJ,
2885        MatCopy_MPIAIJ,
2886 /*44*/ MatGetRowMax_MPIAIJ,
2887        MatScale_MPIAIJ,
2888        0,
2889        0,
2890        MatZeroRowsColumns_MPIAIJ,
2891 /*49*/ MatSetBlockSize_MPIAIJ,
2892        0,
2893        0,
2894        0,
2895        0,
2896 /*54*/ MatFDColoringCreate_MPIAIJ,
2897        0,
2898        MatSetUnfactored_MPIAIJ,
2899        MatPermute_MPIAIJ,
2900        0,
2901 /*59*/ MatGetSubMatrix_MPIAIJ,
2902        MatDestroy_MPIAIJ,
2903        MatView_MPIAIJ,
2904        0,
2905        0,
2906 /*64*/ 0,
2907        0,
2908        0,
2909        0,
2910        0,
2911 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2912        MatGetRowMinAbs_MPIAIJ,
2913        0,
2914        MatSetColoring_MPIAIJ,
2915 #if defined(PETSC_HAVE_ADIC)
2916        MatSetValuesAdic_MPIAIJ,
2917 #else
2918        0,
2919 #endif
2920        MatSetValuesAdifor_MPIAIJ,
2921 /*75*/ MatFDColoringApply_AIJ,
2922        0,
2923        0,
2924        0,
2925        0,
2926 /*80*/ 0,
2927        0,
2928        0,
2929 /*83*/ MatLoad_MPIAIJ,
2930        0,
2931        0,
2932        0,
2933        0,
2934        0,
2935 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2936        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2937        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2938        MatPtAP_Basic,
2939        MatPtAPSymbolic_MPIAIJ,
2940 /*94*/ MatPtAPNumeric_MPIAIJ,
2941        0,
2942        0,
2943        0,
2944        0,
2945 /*99*/ 0,
2946        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2947        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2948        MatConjugate_MPIAIJ,
2949        0,
2950 /*104*/MatSetValuesRow_MPIAIJ,
2951        MatRealPart_MPIAIJ,
2952        MatImaginaryPart_MPIAIJ,
2953        0,
2954        0,
2955 /*109*/0,
2956        MatGetRedundantMatrix_MPIAIJ,
2957        MatGetRowMin_MPIAIJ,
2958        0,
2959        0,
2960 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2961        0,
2962        0,
2963        0,
2964        0,
2965 /*119*/0,
2966        0,
2967        0,
2968        0,
2969        MatGetMultiProcBlock_MPIAIJ,
2970 /*124*/0,
2971        0,
2972        0,
2973        0,
2974        MatGetSubMatricesParallel_MPIAIJ
2975 };
2976 
2977 /* ----------------------------------------------------------------------------------------*/
2978 
2979 EXTERN_C_BEGIN
2980 #undef __FUNCT__
2981 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2982 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2983 {
2984   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2985   PetscErrorCode ierr;
2986 
2987   PetscFunctionBegin;
2988   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2989   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2990   PetscFunctionReturn(0);
2991 }
2992 EXTERN_C_END
2993 
2994 EXTERN_C_BEGIN
2995 #undef __FUNCT__
2996 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2997 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2998 {
2999   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3000   PetscErrorCode ierr;
3001 
3002   PetscFunctionBegin;
3003   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3004   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3005   PetscFunctionReturn(0);
3006 }
3007 EXTERN_C_END
3008 
3009 EXTERN_C_BEGIN
3010 #undef __FUNCT__
3011 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3012 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3013 {
3014   Mat_MPIAIJ     *b;
3015   PetscErrorCode ierr;
3016   PetscInt       i;
3017 
3018   PetscFunctionBegin;
3019   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3020   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3021   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3022   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3023 
3024   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3025   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3026   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3027   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3028   if (d_nnz) {
3029     for (i=0; i<B->rmap->n; i++) {
3030       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3031     }
3032   }
3033   if (o_nnz) {
3034     for (i=0; i<B->rmap->n; i++) {
3035       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3036     }
3037   }
3038   b = (Mat_MPIAIJ*)B->data;
3039 
3040   if (!B->preallocated) {
3041     /* Explicitly create 2 MATSEQAIJ matrices. */
3042     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3043     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3044     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3045     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3046     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3047     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3048     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3049     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3050   }
3051 
3052   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3053   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3054   B->preallocated = PETSC_TRUE;
3055   PetscFunctionReturn(0);
3056 }
3057 EXTERN_C_END
3058 
3059 #undef __FUNCT__
3060 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3061 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3062 {
3063   Mat            mat;
3064   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3065   PetscErrorCode ierr;
3066 
3067   PetscFunctionBegin;
3068   *newmat       = 0;
3069   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3070   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3071   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3072   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3073   a    = (Mat_MPIAIJ*)mat->data;
3074 
3075   mat->factortype    = matin->factortype;
3076   mat->rmap->bs      = matin->rmap->bs;
3077   mat->assembled    = PETSC_TRUE;
3078   mat->insertmode   = NOT_SET_VALUES;
3079   mat->preallocated = PETSC_TRUE;
3080 
3081   a->size           = oldmat->size;
3082   a->rank           = oldmat->rank;
3083   a->donotstash     = oldmat->donotstash;
3084   a->roworiented    = oldmat->roworiented;
3085   a->rowindices     = 0;
3086   a->rowvalues      = 0;
3087   a->getrowactive   = PETSC_FALSE;
3088 
3089   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3090   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3091 
3092   if (oldmat->colmap) {
3093 #if defined (PETSC_USE_CTABLE)
3094     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3095 #else
3096     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3097     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3098     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3099 #endif
3100   } else a->colmap = 0;
3101   if (oldmat->garray) {
3102     PetscInt len;
3103     len  = oldmat->B->cmap->n;
3104     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3105     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3106     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3107   } else a->garray = 0;
3108 
3109   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3110   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3111   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3112   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3113   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3114   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3115   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3116   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3117   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3118   *newmat = mat;
3119   PetscFunctionReturn(0);
3120 }
3121 
3122 /*
3123     Allows sending/receiving larger messages then 2 gigabytes in a single call
3124 */
3125 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
3126 {
3127   int             ierr;
3128   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3129   PetscInt        i,numchunks;
3130   PetscMPIInt     icnt;
3131 
3132   numchunks = cnt/CHUNKSIZE + 1;
3133   for (i=0; i<numchunks; i++) {
3134     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3135     ierr = MPI_Send(mess,icnt,type,to,tag,comm);
3136     if (type == MPIU_INT) {
3137       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3138     } else if (type == MPIU_SCALAR) {
3139       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3140     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3141   }
3142   return 0;
3143 }
3144 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
3145 {
3146   int             ierr;
3147   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3148   MPI_Status      status;
3149   PetscInt        i,numchunks;
3150   PetscMPIInt     icnt;
3151 
3152   numchunks = cnt/CHUNKSIZE + 1;
3153   for (i=0; i<numchunks; i++) {
3154     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3155     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);
3156     if (type == MPIU_INT) {
3157       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3158     } else if (type == MPIU_SCALAR) {
3159       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3160     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3161   }
3162   return 0;
3163 }
3164 
3165 #undef __FUNCT__
3166 #define __FUNCT__ "MatLoad_MPIAIJ"
3167 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3168 {
3169   PetscScalar    *vals,*svals;
3170   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3171   PetscErrorCode ierr;
3172   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3173   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3174   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3175   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3176   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3177   int            fd;
3178 
3179   PetscFunctionBegin;
3180   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3181   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3182   if (!rank) {
3183     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3184     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3185     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3186   }
3187 
3188   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3189 
3190   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3191   M = header[1]; N = header[2];
3192   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3193   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3194   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3195 
3196   /* If global sizes are set, check if they are consistent with that given in the file */
3197   if (sizesset) {
3198     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3199   }
3200   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3201   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3202 
3203   /* determine ownership of all rows */
3204   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3205   else m = newMat->rmap->n; /* Set by user */
3206 
3207   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3208   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3209 
3210   /* First process needs enough room for process with most rows */
3211   if (!rank) {
3212     mmax       = rowners[1];
3213     for (i=2; i<size; i++) {
3214       mmax = PetscMax(mmax,rowners[i]);
3215     }
3216   } else mmax = m;
3217 
3218   rowners[0] = 0;
3219   for (i=2; i<=size; i++) {
3220     rowners[i] += rowners[i-1];
3221   }
3222   rstart = rowners[rank];
3223   rend   = rowners[rank+1];
3224 
3225   /* distribute row lengths to all processors */
3226   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3227   if (!rank) {
3228     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3229     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3230     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3231     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3232     for (j=0; j<m; j++) {
3233       procsnz[0] += ourlens[j];
3234     }
3235     for (i=1; i<size; i++) {
3236       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3237       /* calculate the number of nonzeros on each processor */
3238       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3239         procsnz[i] += rowlengths[j];
3240       }
3241       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3242     }
3243     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3244   } else {
3245     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3246   }
3247 
3248   if (!rank) {
3249     /* determine max buffer needed and allocate it */
3250     maxnz = 0;
3251     for (i=0; i<size; i++) {
3252       maxnz = PetscMax(maxnz,procsnz[i]);
3253     }
3254     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3255 
3256     /* read in my part of the matrix column indices  */
3257     nz   = procsnz[0];
3258     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3259     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3260 
3261     /* read in every one elses and ship off */
3262     for (i=1; i<size; i++) {
3263       nz     = procsnz[i];
3264       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3265       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3266     }
3267     ierr = PetscFree(cols);CHKERRQ(ierr);
3268   } else {
3269     /* determine buffer space needed for message */
3270     nz = 0;
3271     for (i=0; i<m; i++) {
3272       nz += ourlens[i];
3273     }
3274     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3275 
3276     /* receive message of column indices*/
3277     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3278   }
3279 
3280   /* determine column ownership if matrix is not square */
3281   if (N != M) {
3282     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3283     else n = newMat->cmap->n;
3284     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3285     cstart = cend - n;
3286   } else {
3287     cstart = rstart;
3288     cend   = rend;
3289     n      = cend - cstart;
3290   }
3291 
3292   /* loop over local rows, determining number of off diagonal entries */
3293   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3294   jj = 0;
3295   for (i=0; i<m; i++) {
3296     for (j=0; j<ourlens[i]; j++) {
3297       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3298       jj++;
3299     }
3300   }
3301 
3302   for (i=0; i<m; i++) {
3303     ourlens[i] -= offlens[i];
3304   }
3305   if (!sizesset) {
3306     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3307   }
3308   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3309 
3310   for (i=0; i<m; i++) {
3311     ourlens[i] += offlens[i];
3312   }
3313 
3314   if (!rank) {
3315     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3316 
3317     /* read in my part of the matrix numerical values  */
3318     nz   = procsnz[0];
3319     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3320 
3321     /* insert into matrix */
3322     jj      = rstart;
3323     smycols = mycols;
3324     svals   = vals;
3325     for (i=0; i<m; i++) {
3326       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3327       smycols += ourlens[i];
3328       svals   += ourlens[i];
3329       jj++;
3330     }
3331 
3332     /* read in other processors and ship out */
3333     for (i=1; i<size; i++) {
3334       nz     = procsnz[i];
3335       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3336       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3337     }
3338     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3339   } else {
3340     /* receive numeric values */
3341     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3342 
3343     /* receive message of values*/
3344     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3345 
3346     /* insert into matrix */
3347     jj      = rstart;
3348     smycols = mycols;
3349     svals   = vals;
3350     for (i=0; i<m; i++) {
3351       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3352       smycols += ourlens[i];
3353       svals   += ourlens[i];
3354       jj++;
3355     }
3356   }
3357   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3358   ierr = PetscFree(vals);CHKERRQ(ierr);
3359   ierr = PetscFree(mycols);CHKERRQ(ierr);
3360   ierr = PetscFree(rowners);CHKERRQ(ierr);
3361 
3362   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3363   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3364   PetscFunctionReturn(0);
3365 }
3366 
3367 #undef __FUNCT__
3368 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3369 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3370 {
3371   PetscErrorCode ierr;
3372   IS             iscol_local;
3373   PetscInt       csize;
3374 
3375   PetscFunctionBegin;
3376   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3377   if (call == MAT_REUSE_MATRIX) {
3378     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3379     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3380   } else {
3381     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3382   }
3383   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3384   if (call == MAT_INITIAL_MATRIX) {
3385     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3386     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3387   }
3388   PetscFunctionReturn(0);
3389 }
3390 
3391 #undef __FUNCT__
3392 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3393 /*
3394     Not great since it makes two copies of the submatrix, first an SeqAIJ
3395   in local and then by concatenating the local matrices the end result.
3396   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3397 
3398   Note: This requires a sequential iscol with all indices.
3399 */
3400 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3401 {
3402   PetscErrorCode ierr;
3403   PetscMPIInt    rank,size;
3404   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3405   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3406   Mat            *local,M,Mreuse;
3407   MatScalar      *vwork,*aa;
3408   MPI_Comm       comm = ((PetscObject)mat)->comm;
3409   Mat_SeqAIJ     *aij;
3410 
3411 
3412   PetscFunctionBegin;
3413   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3414   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3415 
3416   if (call ==  MAT_REUSE_MATRIX) {
3417     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3418     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3419     local = &Mreuse;
3420     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3421   } else {
3422     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3423     Mreuse = *local;
3424     ierr   = PetscFree(local);CHKERRQ(ierr);
3425   }
3426 
3427   /*
3428       m - number of local rows
3429       n - number of columns (same on all processors)
3430       rstart - first row in new global matrix generated
3431   */
3432   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3433   if (call == MAT_INITIAL_MATRIX) {
3434     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3435     ii  = aij->i;
3436     jj  = aij->j;
3437 
3438     /*
3439         Determine the number of non-zeros in the diagonal and off-diagonal
3440         portions of the matrix in order to do correct preallocation
3441     */
3442 
3443     /* first get start and end of "diagonal" columns */
3444     if (csize == PETSC_DECIDE) {
3445       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3446       if (mglobal == n) { /* square matrix */
3447 	nlocal = m;
3448       } else {
3449         nlocal = n/size + ((n % size) > rank);
3450       }
3451     } else {
3452       nlocal = csize;
3453     }
3454     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3455     rstart = rend - nlocal;
3456     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3457 
3458     /* next, compute all the lengths */
3459     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3460     olens = dlens + m;
3461     for (i=0; i<m; i++) {
3462       jend = ii[i+1] - ii[i];
3463       olen = 0;
3464       dlen = 0;
3465       for (j=0; j<jend; j++) {
3466         if (*jj < rstart || *jj >= rend) olen++;
3467         else dlen++;
3468         jj++;
3469       }
3470       olens[i] = olen;
3471       dlens[i] = dlen;
3472     }
3473     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3474     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3475     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3476     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3477     ierr = PetscFree(dlens);CHKERRQ(ierr);
3478   } else {
3479     PetscInt ml,nl;
3480 
3481     M = *newmat;
3482     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3483     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3484     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3485     /*
3486          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3487        rather than the slower MatSetValues().
3488     */
3489     M->was_assembled = PETSC_TRUE;
3490     M->assembled     = PETSC_FALSE;
3491   }
3492   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3493   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3494   ii  = aij->i;
3495   jj  = aij->j;
3496   aa  = aij->a;
3497   for (i=0; i<m; i++) {
3498     row   = rstart + i;
3499     nz    = ii[i+1] - ii[i];
3500     cwork = jj;     jj += nz;
3501     vwork = aa;     aa += nz;
3502     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3503   }
3504 
3505   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3506   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3507   *newmat = M;
3508 
3509   /* save submatrix used in processor for next request */
3510   if (call ==  MAT_INITIAL_MATRIX) {
3511     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3512     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3513   }
3514 
3515   PetscFunctionReturn(0);
3516 }
3517 
3518 EXTERN_C_BEGIN
3519 #undef __FUNCT__
3520 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3521 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3522 {
3523   PetscInt       m,cstart, cend,j,nnz,i,d;
3524   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3525   const PetscInt *JJ;
3526   PetscScalar    *values;
3527   PetscErrorCode ierr;
3528 
3529   PetscFunctionBegin;
3530   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3531 
3532   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3533   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3534   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3535   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3536   m      = B->rmap->n;
3537   cstart = B->cmap->rstart;
3538   cend   = B->cmap->rend;
3539   rstart = B->rmap->rstart;
3540 
3541   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3542 
3543 #if defined(PETSC_USE_DEBUGGING)
3544   for (i=0; i<m; i++) {
3545     nnz     = Ii[i+1]- Ii[i];
3546     JJ      = J + Ii[i];
3547     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3548     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3549     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3550   }
3551 #endif
3552 
3553   for (i=0; i<m; i++) {
3554     nnz     = Ii[i+1]- Ii[i];
3555     JJ      = J + Ii[i];
3556     nnz_max = PetscMax(nnz_max,nnz);
3557     d       = 0;
3558     for (j=0; j<nnz; j++) {
3559       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3560     }
3561     d_nnz[i] = d;
3562     o_nnz[i] = nnz - d;
3563   }
3564   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3565   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3566 
3567   if (v) values = (PetscScalar*)v;
3568   else {
3569     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3570     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3571   }
3572 
3573   for (i=0; i<m; i++) {
3574     ii   = i + rstart;
3575     nnz  = Ii[i+1]- Ii[i];
3576     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3577   }
3578   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3579   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3580 
3581   if (!v) {
3582     ierr = PetscFree(values);CHKERRQ(ierr);
3583   }
3584   PetscFunctionReturn(0);
3585 }
3586 EXTERN_C_END
3587 
3588 #undef __FUNCT__
3589 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3590 /*@
3591    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3592    (the default parallel PETSc format).
3593 
3594    Collective on MPI_Comm
3595 
3596    Input Parameters:
3597 +  B - the matrix
3598 .  i - the indices into j for the start of each local row (starts with zero)
3599 .  j - the column indices for each local row (starts with zero)
3600 -  v - optional values in the matrix
3601 
3602    Level: developer
3603 
3604    Notes:
3605        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3606      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3607      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3608 
3609        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3610 
3611        The format which is used for the sparse matrix input, is equivalent to a
3612     row-major ordering.. i.e for the following matrix, the input data expected is
3613     as shown:
3614 
3615         1 0 0
3616         2 0 3     P0
3617        -------
3618         4 5 6     P1
3619 
3620      Process0 [P0]: rows_owned=[0,1]
3621         i =  {0,1,3}  [size = nrow+1  = 2+1]
3622         j =  {0,0,2}  [size = nz = 6]
3623         v =  {1,2,3}  [size = nz = 6]
3624 
3625      Process1 [P1]: rows_owned=[2]
3626         i =  {0,3}    [size = nrow+1  = 1+1]
3627         j =  {0,1,2}  [size = nz = 6]
3628         v =  {4,5,6}  [size = nz = 6]
3629 
3630 .keywords: matrix, aij, compressed row, sparse, parallel
3631 
3632 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3633           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3634 @*/
3635 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3636 {
3637   PetscErrorCode ierr;
3638 
3639   PetscFunctionBegin;
3640   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3641   PetscFunctionReturn(0);
3642 }
3643 
3644 #undef __FUNCT__
3645 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3646 /*@C
3647    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3648    (the default parallel PETSc format).  For good matrix assembly performance
3649    the user should preallocate the matrix storage by setting the parameters
3650    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3651    performance can be increased by more than a factor of 50.
3652 
3653    Collective on MPI_Comm
3654 
3655    Input Parameters:
3656 +  A - the matrix
3657 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3658            (same value is used for all local rows)
3659 .  d_nnz - array containing the number of nonzeros in the various rows of the
3660            DIAGONAL portion of the local submatrix (possibly different for each row)
3661            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3662            The size of this array is equal to the number of local rows, i.e 'm'.
3663            You must leave room for the diagonal entry even if it is zero.
3664 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3665            submatrix (same value is used for all local rows).
3666 -  o_nnz - array containing the number of nonzeros in the various rows of the
3667            OFF-DIAGONAL portion of the local submatrix (possibly different for
3668            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3669            structure. The size of this array is equal to the number
3670            of local rows, i.e 'm'.
3671 
3672    If the *_nnz parameter is given then the *_nz parameter is ignored
3673 
3674    The AIJ format (also called the Yale sparse matrix format or
3675    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3676    storage.  The stored row and column indices begin with zero.
3677    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3678 
3679    The parallel matrix is partitioned such that the first m0 rows belong to
3680    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3681    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3682 
3683    The DIAGONAL portion of the local submatrix of a processor can be defined
3684    as the submatrix which is obtained by extraction the part corresponding to
3685    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3686    first row that belongs to the processor, r2 is the last row belonging to
3687    the this processor, and c1-c2 is range of indices of the local part of a
3688    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3689    common case of a square matrix, the row and column ranges are the same and
3690    the DIAGONAL part is also square. The remaining portion of the local
3691    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3692 
3693    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3694 
3695    You can call MatGetInfo() to get information on how effective the preallocation was;
3696    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3697    You can also run with the option -info and look for messages with the string
3698    malloc in them to see if additional memory allocation was needed.
3699 
3700    Example usage:
3701 
3702    Consider the following 8x8 matrix with 34 non-zero values, that is
3703    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3704    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3705    as follows:
3706 
3707 .vb
3708             1  2  0  |  0  3  0  |  0  4
3709     Proc0   0  5  6  |  7  0  0  |  8  0
3710             9  0 10  | 11  0  0  | 12  0
3711     -------------------------------------
3712            13  0 14  | 15 16 17  |  0  0
3713     Proc1   0 18  0  | 19 20 21  |  0  0
3714             0  0  0  | 22 23  0  | 24  0
3715     -------------------------------------
3716     Proc2  25 26 27  |  0  0 28  | 29  0
3717            30  0  0  | 31 32 33  |  0 34
3718 .ve
3719 
3720    This can be represented as a collection of submatrices as:
3721 
3722 .vb
3723       A B C
3724       D E F
3725       G H I
3726 .ve
3727 
3728    Where the submatrices A,B,C are owned by proc0, D,E,F are
3729    owned by proc1, G,H,I are owned by proc2.
3730 
3731    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3732    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3733    The 'M','N' parameters are 8,8, and have the same values on all procs.
3734 
3735    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3736    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3737    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3738    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3739    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3740    matrix, ans [DF] as another SeqAIJ matrix.
3741 
3742    When d_nz, o_nz parameters are specified, d_nz storage elements are
3743    allocated for every row of the local diagonal submatrix, and o_nz
3744    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3745    One way to choose d_nz and o_nz is to use the max nonzerors per local
3746    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3747    In this case, the values of d_nz,o_nz are:
3748 .vb
3749      proc0 : dnz = 2, o_nz = 2
3750      proc1 : dnz = 3, o_nz = 2
3751      proc2 : dnz = 1, o_nz = 4
3752 .ve
3753    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3754    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3755    for proc3. i.e we are using 12+15+10=37 storage locations to store
3756    34 values.
3757 
3758    When d_nnz, o_nnz parameters are specified, the storage is specified
3759    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3760    In the above case the values for d_nnz,o_nnz are:
3761 .vb
3762      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3763      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3764      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3765 .ve
3766    Here the space allocated is sum of all the above values i.e 34, and
3767    hence pre-allocation is perfect.
3768 
3769    Level: intermediate
3770 
3771 .keywords: matrix, aij, compressed row, sparse, parallel
3772 
3773 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3774           MPIAIJ, MatGetInfo()
3775 @*/
3776 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3777 {
3778   PetscErrorCode ierr;
3779 
3780   PetscFunctionBegin;
3781   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3782   PetscFunctionReturn(0);
3783 }
3784 
3785 #undef __FUNCT__
3786 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3787 /*@
3788      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3789          CSR format the local rows.
3790 
3791    Collective on MPI_Comm
3792 
3793    Input Parameters:
3794 +  comm - MPI communicator
3795 .  m - number of local rows (Cannot be PETSC_DECIDE)
3796 .  n - This value should be the same as the local size used in creating the
3797        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3798        calculated if N is given) For square matrices n is almost always m.
3799 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3800 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3801 .   i - row indices
3802 .   j - column indices
3803 -   a - matrix values
3804 
3805    Output Parameter:
3806 .   mat - the matrix
3807 
3808    Level: intermediate
3809 
3810    Notes:
3811        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3812      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3813      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3814 
3815        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3816 
3817        The format which is used for the sparse matrix input, is equivalent to a
3818     row-major ordering.. i.e for the following matrix, the input data expected is
3819     as shown:
3820 
3821         1 0 0
3822         2 0 3     P0
3823        -------
3824         4 5 6     P1
3825 
3826      Process0 [P0]: rows_owned=[0,1]
3827         i =  {0,1,3}  [size = nrow+1  = 2+1]
3828         j =  {0,0,2}  [size = nz = 6]
3829         v =  {1,2,3}  [size = nz = 6]
3830 
3831      Process1 [P1]: rows_owned=[2]
3832         i =  {0,3}    [size = nrow+1  = 1+1]
3833         j =  {0,1,2}  [size = nz = 6]
3834         v =  {4,5,6}  [size = nz = 6]
3835 
3836 .keywords: matrix, aij, compressed row, sparse, parallel
3837 
3838 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3839           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3840 @*/
3841 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3842 {
3843   PetscErrorCode ierr;
3844 
3845  PetscFunctionBegin;
3846   if (i[0]) {
3847     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3848   }
3849   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3850   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3851   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3852   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3853   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3854   PetscFunctionReturn(0);
3855 }
3856 
3857 #undef __FUNCT__
3858 #define __FUNCT__ "MatCreateMPIAIJ"
3859 /*@C
3860    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3861    (the default parallel PETSc format).  For good matrix assembly performance
3862    the user should preallocate the matrix storage by setting the parameters
3863    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3864    performance can be increased by more than a factor of 50.
3865 
3866    Collective on MPI_Comm
3867 
3868    Input Parameters:
3869 +  comm - MPI communicator
3870 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3871            This value should be the same as the local size used in creating the
3872            y vector for the matrix-vector product y = Ax.
3873 .  n - This value should be the same as the local size used in creating the
3874        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3875        calculated if N is given) For square matrices n is almost always m.
3876 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3877 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3878 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3879            (same value is used for all local rows)
3880 .  d_nnz - array containing the number of nonzeros in the various rows of the
3881            DIAGONAL portion of the local submatrix (possibly different for each row)
3882            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3883            The size of this array is equal to the number of local rows, i.e 'm'.
3884            You must leave room for the diagonal entry even if it is zero.
3885 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3886            submatrix (same value is used for all local rows).
3887 -  o_nnz - array containing the number of nonzeros in the various rows of the
3888            OFF-DIAGONAL portion of the local submatrix (possibly different for
3889            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3890            structure. The size of this array is equal to the number
3891            of local rows, i.e 'm'.
3892 
3893    Output Parameter:
3894 .  A - the matrix
3895 
3896    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3897    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3898    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3899 
3900    Notes:
3901    If the *_nnz parameter is given then the *_nz parameter is ignored
3902 
3903    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3904    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3905    storage requirements for this matrix.
3906 
3907    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3908    processor than it must be used on all processors that share the object for
3909    that argument.
3910 
3911    The user MUST specify either the local or global matrix dimensions
3912    (possibly both).
3913 
3914    The parallel matrix is partitioned across processors such that the
3915    first m0 rows belong to process 0, the next m1 rows belong to
3916    process 1, the next m2 rows belong to process 2 etc.. where
3917    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3918    values corresponding to [m x N] submatrix.
3919 
3920    The columns are logically partitioned with the n0 columns belonging
3921    to 0th partition, the next n1 columns belonging to the next
3922    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3923 
3924    The DIAGONAL portion of the local submatrix on any given processor
3925    is the submatrix corresponding to the rows and columns m,n
3926    corresponding to the given processor. i.e diagonal matrix on
3927    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3928    etc. The remaining portion of the local submatrix [m x (N-n)]
3929    constitute the OFF-DIAGONAL portion. The example below better
3930    illustrates this concept.
3931 
3932    For a square global matrix we define each processor's diagonal portion
3933    to be its local rows and the corresponding columns (a square submatrix);
3934    each processor's off-diagonal portion encompasses the remainder of the
3935    local matrix (a rectangular submatrix).
3936 
3937    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3938 
3939    When calling this routine with a single process communicator, a matrix of
3940    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3941    type of communicator, use the construction mechanism:
3942      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3943 
3944    By default, this format uses inodes (identical nodes) when possible.
3945    We search for consecutive rows with the same nonzero structure, thereby
3946    reusing matrix information to achieve increased efficiency.
3947 
3948    Options Database Keys:
3949 +  -mat_no_inode  - Do not use inodes
3950 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3951 -  -mat_aij_oneindex - Internally use indexing starting at 1
3952         rather than 0.  Note that when calling MatSetValues(),
3953         the user still MUST index entries starting at 0!
3954 
3955 
3956    Example usage:
3957 
3958    Consider the following 8x8 matrix with 34 non-zero values, that is
3959    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3960    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3961    as follows:
3962 
3963 .vb
3964             1  2  0  |  0  3  0  |  0  4
3965     Proc0   0  5  6  |  7  0  0  |  8  0
3966             9  0 10  | 11  0  0  | 12  0
3967     -------------------------------------
3968            13  0 14  | 15 16 17  |  0  0
3969     Proc1   0 18  0  | 19 20 21  |  0  0
3970             0  0  0  | 22 23  0  | 24  0
3971     -------------------------------------
3972     Proc2  25 26 27  |  0  0 28  | 29  0
3973            30  0  0  | 31 32 33  |  0 34
3974 .ve
3975 
3976    This can be represented as a collection of submatrices as:
3977 
3978 .vb
3979       A B C
3980       D E F
3981       G H I
3982 .ve
3983 
3984    Where the submatrices A,B,C are owned by proc0, D,E,F are
3985    owned by proc1, G,H,I are owned by proc2.
3986 
3987    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3988    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3989    The 'M','N' parameters are 8,8, and have the same values on all procs.
3990 
3991    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3992    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3993    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3994    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3995    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3996    matrix, ans [DF] as another SeqAIJ matrix.
3997 
3998    When d_nz, o_nz parameters are specified, d_nz storage elements are
3999    allocated for every row of the local diagonal submatrix, and o_nz
4000    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4001    One way to choose d_nz and o_nz is to use the max nonzerors per local
4002    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4003    In this case, the values of d_nz,o_nz are:
4004 .vb
4005      proc0 : dnz = 2, o_nz = 2
4006      proc1 : dnz = 3, o_nz = 2
4007      proc2 : dnz = 1, o_nz = 4
4008 .ve
4009    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4010    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4011    for proc3. i.e we are using 12+15+10=37 storage locations to store
4012    34 values.
4013 
4014    When d_nnz, o_nnz parameters are specified, the storage is specified
4015    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4016    In the above case the values for d_nnz,o_nnz are:
4017 .vb
4018      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4019      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4020      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4021 .ve
4022    Here the space allocated is sum of all the above values i.e 34, and
4023    hence pre-allocation is perfect.
4024 
4025    Level: intermediate
4026 
4027 .keywords: matrix, aij, compressed row, sparse, parallel
4028 
4029 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4030           MPIAIJ, MatCreateMPIAIJWithArrays()
4031 @*/
4032 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4033 {
4034   PetscErrorCode ierr;
4035   PetscMPIInt    size;
4036 
4037   PetscFunctionBegin;
4038   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4039   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4040   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4041   if (size > 1) {
4042     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4043     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4044   } else {
4045     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4046     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4047   }
4048   PetscFunctionReturn(0);
4049 }
4050 
4051 #undef __FUNCT__
4052 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4053 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4054 {
4055   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4056 
4057   PetscFunctionBegin;
4058   *Ad     = a->A;
4059   *Ao     = a->B;
4060   *colmap = a->garray;
4061   PetscFunctionReturn(0);
4062 }
4063 
4064 #undef __FUNCT__
4065 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4066 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4067 {
4068   PetscErrorCode ierr;
4069   PetscInt       i;
4070   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4071 
4072   PetscFunctionBegin;
4073   if (coloring->ctype == IS_COLORING_GLOBAL) {
4074     ISColoringValue *allcolors,*colors;
4075     ISColoring      ocoloring;
4076 
4077     /* set coloring for diagonal portion */
4078     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4079 
4080     /* set coloring for off-diagonal portion */
4081     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4082     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4083     for (i=0; i<a->B->cmap->n; i++) {
4084       colors[i] = allcolors[a->garray[i]];
4085     }
4086     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4087     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4088     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4089     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4090   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4091     ISColoringValue *colors;
4092     PetscInt        *larray;
4093     ISColoring      ocoloring;
4094 
4095     /* set coloring for diagonal portion */
4096     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4097     for (i=0; i<a->A->cmap->n; i++) {
4098       larray[i] = i + A->cmap->rstart;
4099     }
4100     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4101     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4102     for (i=0; i<a->A->cmap->n; i++) {
4103       colors[i] = coloring->colors[larray[i]];
4104     }
4105     ierr = PetscFree(larray);CHKERRQ(ierr);
4106     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4107     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4108     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4109 
4110     /* set coloring for off-diagonal portion */
4111     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4112     ierr = ISGlobalToLocalMappingApply(A->cmapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4113     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4114     for (i=0; i<a->B->cmap->n; i++) {
4115       colors[i] = coloring->colors[larray[i]];
4116     }
4117     ierr = PetscFree(larray);CHKERRQ(ierr);
4118     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4119     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4120     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4121   } else {
4122     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4123   }
4124 
4125   PetscFunctionReturn(0);
4126 }
4127 
4128 #if defined(PETSC_HAVE_ADIC)
4129 #undef __FUNCT__
4130 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4131 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4132 {
4133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4134   PetscErrorCode ierr;
4135 
4136   PetscFunctionBegin;
4137   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4138   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4139   PetscFunctionReturn(0);
4140 }
4141 #endif
4142 
4143 #undef __FUNCT__
4144 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4145 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4146 {
4147   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4148   PetscErrorCode ierr;
4149 
4150   PetscFunctionBegin;
4151   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4152   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4153   PetscFunctionReturn(0);
4154 }
4155 
4156 #undef __FUNCT__
4157 #define __FUNCT__ "MatMerge"
4158 /*@
4159       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4160                  matrices from each processor
4161 
4162     Collective on MPI_Comm
4163 
4164    Input Parameters:
4165 +    comm - the communicators the parallel matrix will live on
4166 .    inmat - the input sequential matrices
4167 .    n - number of local columns (or PETSC_DECIDE)
4168 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4169 
4170    Output Parameter:
4171 .    outmat - the parallel matrix generated
4172 
4173     Level: advanced
4174 
4175    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4176 
4177 @*/
4178 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4179 {
4180   PetscErrorCode ierr;
4181   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4182   PetscInt       *indx;
4183   PetscScalar    *values;
4184 
4185   PetscFunctionBegin;
4186   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4187   if (scall == MAT_INITIAL_MATRIX){
4188     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4189     if (n == PETSC_DECIDE){
4190       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4191     }
4192     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4193     rstart -= m;
4194 
4195     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4196     for (i=0;i<m;i++) {
4197       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4198       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4199       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4200     }
4201     /* This routine will ONLY return MPIAIJ type matrix */
4202     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4203     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4204     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4205     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4206     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4207 
4208   } else if (scall == MAT_REUSE_MATRIX){
4209     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4210   } else {
4211     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4212   }
4213 
4214   for (i=0;i<m;i++) {
4215     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4216     Ii    = i + rstart;
4217     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4218     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4219   }
4220   ierr = MatDestroy(inmat);CHKERRQ(ierr);
4221   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4222   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4223 
4224   PetscFunctionReturn(0);
4225 }
4226 
4227 #undef __FUNCT__
4228 #define __FUNCT__ "MatFileSplit"
4229 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4230 {
4231   PetscErrorCode    ierr;
4232   PetscMPIInt       rank;
4233   PetscInt          m,N,i,rstart,nnz;
4234   size_t            len;
4235   const PetscInt    *indx;
4236   PetscViewer       out;
4237   char              *name;
4238   Mat               B;
4239   const PetscScalar *values;
4240 
4241   PetscFunctionBegin;
4242   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4243   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4244   /* Should this be the type of the diagonal block of A? */
4245   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4246   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4247   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4248   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4249   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4250   for (i=0;i<m;i++) {
4251     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4252     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4253     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4254   }
4255   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4256   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4257 
4258   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4259   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4260   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4261   sprintf(name,"%s.%d",outfile,rank);
4262   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4263   ierr = PetscFree(name);
4264   ierr = MatView(B,out);CHKERRQ(ierr);
4265   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4266   ierr = MatDestroy(B);CHKERRQ(ierr);
4267   PetscFunctionReturn(0);
4268 }
4269 
4270 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
4271 #undef __FUNCT__
4272 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4273 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4274 {
4275   PetscErrorCode       ierr;
4276   Mat_Merge_SeqsToMPI  *merge;
4277   PetscContainer       container;
4278 
4279   PetscFunctionBegin;
4280   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4281   if (container) {
4282     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4283     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4284     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4285     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4286     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4287     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4288     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4289     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4290     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4291     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4292     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4293     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4294     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4295     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4296 
4297     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4298     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4299   }
4300   ierr = PetscFree(merge);CHKERRQ(ierr);
4301 
4302   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4303   PetscFunctionReturn(0);
4304 }
4305 
4306 #include "../src/mat/utils/freespace.h"
4307 #include "petscbt.h"
4308 
4309 #undef __FUNCT__
4310 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4311 /*@C
4312       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4313                  matrices from each processor
4314 
4315     Collective on MPI_Comm
4316 
4317    Input Parameters:
4318 +    comm - the communicators the parallel matrix will live on
4319 .    seqmat - the input sequential matrices
4320 .    m - number of local rows (or PETSC_DECIDE)
4321 .    n - number of local columns (or PETSC_DECIDE)
4322 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4323 
4324    Output Parameter:
4325 .    mpimat - the parallel matrix generated
4326 
4327     Level: advanced
4328 
4329    Notes:
4330      The dimensions of the sequential matrix in each processor MUST be the same.
4331      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4332      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4333 @*/
4334 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4335 {
4336   PetscErrorCode       ierr;
4337   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4338   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4339   PetscMPIInt          size,rank,taga,*len_s;
4340   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4341   PetscInt             proc,m;
4342   PetscInt             **buf_ri,**buf_rj;
4343   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4344   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4345   MPI_Request          *s_waits,*r_waits;
4346   MPI_Status           *status;
4347   MatScalar            *aa=a->a;
4348   MatScalar            **abuf_r,*ba_i;
4349   Mat_Merge_SeqsToMPI  *merge;
4350   PetscContainer       container;
4351 
4352   PetscFunctionBegin;
4353   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4354 
4355   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4356   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4357 
4358   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4359   if (container) {
4360     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4361   }
4362   bi     = merge->bi;
4363   bj     = merge->bj;
4364   buf_ri = merge->buf_ri;
4365   buf_rj = merge->buf_rj;
4366 
4367   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4368   owners = merge->rowmap->range;
4369   len_s  = merge->len_s;
4370 
4371   /* send and recv matrix values */
4372   /*-----------------------------*/
4373   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4374   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4375 
4376   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4377   for (proc=0,k=0; proc<size; proc++){
4378     if (!len_s[proc]) continue;
4379     i = owners[proc];
4380     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4381     k++;
4382   }
4383 
4384   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4385   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4386   ierr = PetscFree(status);CHKERRQ(ierr);
4387 
4388   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4389   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4390 
4391   /* insert mat values of mpimat */
4392   /*----------------------------*/
4393   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4394   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4395 
4396   for (k=0; k<merge->nrecv; k++){
4397     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4398     nrows = *(buf_ri_k[k]);
4399     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4400     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4401   }
4402 
4403   /* set values of ba */
4404   m = merge->rowmap->n;
4405   for (i=0; i<m; i++) {
4406     arow = owners[rank] + i;
4407     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4408     bnzi = bi[i+1] - bi[i];
4409     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4410 
4411     /* add local non-zero vals of this proc's seqmat into ba */
4412     anzi = ai[arow+1] - ai[arow];
4413     aj   = a->j + ai[arow];
4414     aa   = a->a + ai[arow];
4415     nextaj = 0;
4416     for (j=0; nextaj<anzi; j++){
4417       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4418         ba_i[j] += aa[nextaj++];
4419       }
4420     }
4421 
4422     /* add received vals into ba */
4423     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4424       /* i-th row */
4425       if (i == *nextrow[k]) {
4426         anzi = *(nextai[k]+1) - *nextai[k];
4427         aj   = buf_rj[k] + *(nextai[k]);
4428         aa   = abuf_r[k] + *(nextai[k]);
4429         nextaj = 0;
4430         for (j=0; nextaj<anzi; j++){
4431           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4432             ba_i[j] += aa[nextaj++];
4433           }
4434         }
4435         nextrow[k]++; nextai[k]++;
4436       }
4437     }
4438     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4439   }
4440   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4441   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4442 
4443   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4444   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4445   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4446   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4447   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4448   PetscFunctionReturn(0);
4449 }
4450 
4451 #undef __FUNCT__
4452 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4453 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4454 {
4455   PetscErrorCode       ierr;
4456   Mat                  B_mpi;
4457   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4458   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4459   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4460   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4461   PetscInt             len,proc,*dnz,*onz;
4462   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4463   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4464   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4465   MPI_Status           *status;
4466   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4467   PetscBT              lnkbt;
4468   Mat_Merge_SeqsToMPI  *merge;
4469   PetscContainer       container;
4470 
4471   PetscFunctionBegin;
4472   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4473 
4474   /* make sure it is a PETSc comm */
4475   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4476   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4477   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4478 
4479   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4480   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4481 
4482   /* determine row ownership */
4483   /*---------------------------------------------------------*/
4484   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4485   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4486   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4487   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4488   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4489   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4490   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4491 
4492   m      = merge->rowmap->n;
4493   M      = merge->rowmap->N;
4494   owners = merge->rowmap->range;
4495 
4496   /* determine the number of messages to send, their lengths */
4497   /*---------------------------------------------------------*/
4498   len_s  = merge->len_s;
4499 
4500   len = 0;  /* length of buf_si[] */
4501   merge->nsend = 0;
4502   for (proc=0; proc<size; proc++){
4503     len_si[proc] = 0;
4504     if (proc == rank){
4505       len_s[proc] = 0;
4506     } else {
4507       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4508       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4509     }
4510     if (len_s[proc]) {
4511       merge->nsend++;
4512       nrows = 0;
4513       for (i=owners[proc]; i<owners[proc+1]; i++){
4514         if (ai[i+1] > ai[i]) nrows++;
4515       }
4516       len_si[proc] = 2*(nrows+1);
4517       len += len_si[proc];
4518     }
4519   }
4520 
4521   /* determine the number and length of messages to receive for ij-structure */
4522   /*-------------------------------------------------------------------------*/
4523   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4524   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4525 
4526   /* post the Irecv of j-structure */
4527   /*-------------------------------*/
4528   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4529   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4530 
4531   /* post the Isend of j-structure */
4532   /*--------------------------------*/
4533   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4534 
4535   for (proc=0, k=0; proc<size; proc++){
4536     if (!len_s[proc]) continue;
4537     i = owners[proc];
4538     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4539     k++;
4540   }
4541 
4542   /* receives and sends of j-structure are complete */
4543   /*------------------------------------------------*/
4544   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4545   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4546 
4547   /* send and recv i-structure */
4548   /*---------------------------*/
4549   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4550   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4551 
4552   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4553   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4554   for (proc=0,k=0; proc<size; proc++){
4555     if (!len_s[proc]) continue;
4556     /* form outgoing message for i-structure:
4557          buf_si[0]:                 nrows to be sent
4558                [1:nrows]:           row index (global)
4559                [nrows+1:2*nrows+1]: i-structure index
4560     */
4561     /*-------------------------------------------*/
4562     nrows = len_si[proc]/2 - 1;
4563     buf_si_i    = buf_si + nrows+1;
4564     buf_si[0]   = nrows;
4565     buf_si_i[0] = 0;
4566     nrows = 0;
4567     for (i=owners[proc]; i<owners[proc+1]; i++){
4568       anzi = ai[i+1] - ai[i];
4569       if (anzi) {
4570         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4571         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4572         nrows++;
4573       }
4574     }
4575     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4576     k++;
4577     buf_si += len_si[proc];
4578   }
4579 
4580   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4581   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4582 
4583   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4584   for (i=0; i<merge->nrecv; i++){
4585     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4586   }
4587 
4588   ierr = PetscFree(len_si);CHKERRQ(ierr);
4589   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4590   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4591   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4592   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4593   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4594   ierr = PetscFree(status);CHKERRQ(ierr);
4595 
4596   /* compute a local seq matrix in each processor */
4597   /*----------------------------------------------*/
4598   /* allocate bi array and free space for accumulating nonzero column info */
4599   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4600   bi[0] = 0;
4601 
4602   /* create and initialize a linked list */
4603   nlnk = N+1;
4604   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4605 
4606   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4607   len = 0;
4608   len  = ai[owners[rank+1]] - ai[owners[rank]];
4609   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4610   current_space = free_space;
4611 
4612   /* determine symbolic info for each local row */
4613   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4614 
4615   for (k=0; k<merge->nrecv; k++){
4616     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4617     nrows = *buf_ri_k[k];
4618     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4619     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4620   }
4621 
4622   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4623   len = 0;
4624   for (i=0;i<m;i++) {
4625     bnzi   = 0;
4626     /* add local non-zero cols of this proc's seqmat into lnk */
4627     arow   = owners[rank] + i;
4628     anzi   = ai[arow+1] - ai[arow];
4629     aj     = a->j + ai[arow];
4630     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4631     bnzi += nlnk;
4632     /* add received col data into lnk */
4633     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4634       if (i == *nextrow[k]) { /* i-th row */
4635         anzi = *(nextai[k]+1) - *nextai[k];
4636         aj   = buf_rj[k] + *nextai[k];
4637         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4638         bnzi += nlnk;
4639         nextrow[k]++; nextai[k]++;
4640       }
4641     }
4642     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4643 
4644     /* if free space is not available, make more free space */
4645     if (current_space->local_remaining<bnzi) {
4646       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4647       nspacedouble++;
4648     }
4649     /* copy data into free space, then initialize lnk */
4650     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4651     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4652 
4653     current_space->array           += bnzi;
4654     current_space->local_used      += bnzi;
4655     current_space->local_remaining -= bnzi;
4656 
4657     bi[i+1] = bi[i] + bnzi;
4658   }
4659 
4660   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4661 
4662   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4663   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4664   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4665 
4666   /* create symbolic parallel matrix B_mpi */
4667   /*---------------------------------------*/
4668   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4669   if (n==PETSC_DECIDE) {
4670     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4671   } else {
4672     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4673   }
4674   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4675   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4676   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4677 
4678   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4679   B_mpi->assembled     = PETSC_FALSE;
4680   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4681   merge->bi            = bi;
4682   merge->bj            = bj;
4683   merge->buf_ri        = buf_ri;
4684   merge->buf_rj        = buf_rj;
4685   merge->coi           = PETSC_NULL;
4686   merge->coj           = PETSC_NULL;
4687   merge->owners_co     = PETSC_NULL;
4688 
4689   /* attach the supporting struct to B_mpi for reuse */
4690   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4691   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4692   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4693   *mpimat = B_mpi;
4694 
4695   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4696   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4697   PetscFunctionReturn(0);
4698 }
4699 
4700 #undef __FUNCT__
4701 #define __FUNCT__ "MatMerge_SeqsToMPI"
4702 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4703 {
4704   PetscErrorCode   ierr;
4705 
4706   PetscFunctionBegin;
4707   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4708   if (scall == MAT_INITIAL_MATRIX){
4709     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4710   }
4711   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4712   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4713   PetscFunctionReturn(0);
4714 }
4715 
4716 #undef __FUNCT__
4717 #define __FUNCT__ "MatGetLocalMat"
4718 /*@
4719      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4720 
4721     Not Collective
4722 
4723    Input Parameters:
4724 +    A - the matrix
4725 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4726 
4727    Output Parameter:
4728 .    A_loc - the local sequential matrix generated
4729 
4730     Level: developer
4731 
4732 @*/
4733 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4734 {
4735   PetscErrorCode  ierr;
4736   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4737   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4738   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4739   MatScalar       *aa=a->a,*ba=b->a,*cam;
4740   PetscScalar     *ca;
4741   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4742   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4743 
4744   PetscFunctionBegin;
4745   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4746   if (scall == MAT_INITIAL_MATRIX){
4747     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4748     ci[0] = 0;
4749     for (i=0; i<am; i++){
4750       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4751     }
4752     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4753     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4754     k = 0;
4755     for (i=0; i<am; i++) {
4756       ncols_o = bi[i+1] - bi[i];
4757       ncols_d = ai[i+1] - ai[i];
4758       /* off-diagonal portion of A */
4759       for (jo=0; jo<ncols_o; jo++) {
4760         col = cmap[*bj];
4761         if (col >= cstart) break;
4762         cj[k]   = col; bj++;
4763         ca[k++] = *ba++;
4764       }
4765       /* diagonal portion of A */
4766       for (j=0; j<ncols_d; j++) {
4767         cj[k]   = cstart + *aj++;
4768         ca[k++] = *aa++;
4769       }
4770       /* off-diagonal portion of A */
4771       for (j=jo; j<ncols_o; j++) {
4772         cj[k]   = cmap[*bj++];
4773         ca[k++] = *ba++;
4774       }
4775     }
4776     /* put together the new matrix */
4777     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4778     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4779     /* Since these are PETSc arrays, change flags to free them as necessary. */
4780     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4781     mat->free_a  = PETSC_TRUE;
4782     mat->free_ij = PETSC_TRUE;
4783     mat->nonew   = 0;
4784   } else if (scall == MAT_REUSE_MATRIX){
4785     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4786     ci = mat->i; cj = mat->j; cam = mat->a;
4787     for (i=0; i<am; i++) {
4788       /* off-diagonal portion of A */
4789       ncols_o = bi[i+1] - bi[i];
4790       for (jo=0; jo<ncols_o; jo++) {
4791         col = cmap[*bj];
4792         if (col >= cstart) break;
4793         *cam++ = *ba++; bj++;
4794       }
4795       /* diagonal portion of A */
4796       ncols_d = ai[i+1] - ai[i];
4797       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4798       /* off-diagonal portion of A */
4799       for (j=jo; j<ncols_o; j++) {
4800         *cam++ = *ba++; bj++;
4801       }
4802     }
4803   } else {
4804     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4805   }
4806 
4807   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4808   PetscFunctionReturn(0);
4809 }
4810 
4811 #undef __FUNCT__
4812 #define __FUNCT__ "MatGetLocalMatCondensed"
4813 /*@C
4814      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4815 
4816     Not Collective
4817 
4818    Input Parameters:
4819 +    A - the matrix
4820 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4821 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4822 
4823    Output Parameter:
4824 .    A_loc - the local sequential matrix generated
4825 
4826     Level: developer
4827 
4828 @*/
4829 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4830 {
4831   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4832   PetscErrorCode    ierr;
4833   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4834   IS                isrowa,iscola;
4835   Mat               *aloc;
4836 
4837   PetscFunctionBegin;
4838   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4839   if (!row){
4840     start = A->rmap->rstart; end = A->rmap->rend;
4841     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4842   } else {
4843     isrowa = *row;
4844   }
4845   if (!col){
4846     start = A->cmap->rstart;
4847     cmap  = a->garray;
4848     nzA   = a->A->cmap->n;
4849     nzB   = a->B->cmap->n;
4850     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4851     ncols = 0;
4852     for (i=0; i<nzB; i++) {
4853       if (cmap[i] < start) idx[ncols++] = cmap[i];
4854       else break;
4855     }
4856     imark = i;
4857     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4858     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4859     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4860   } else {
4861     iscola = *col;
4862   }
4863   if (scall != MAT_INITIAL_MATRIX){
4864     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4865     aloc[0] = *A_loc;
4866   }
4867   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4868   *A_loc = aloc[0];
4869   ierr = PetscFree(aloc);CHKERRQ(ierr);
4870   if (!row){
4871     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4872   }
4873   if (!col){
4874     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4875   }
4876   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4877   PetscFunctionReturn(0);
4878 }
4879 
4880 #undef __FUNCT__
4881 #define __FUNCT__ "MatGetBrowsOfAcols"
4882 /*@C
4883     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4884 
4885     Collective on Mat
4886 
4887    Input Parameters:
4888 +    A,B - the matrices in mpiaij format
4889 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4890 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4891 
4892    Output Parameter:
4893 +    rowb, colb - index sets of rows and columns of B to extract
4894 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4895 -    B_seq - the sequential matrix generated
4896 
4897     Level: developer
4898 
4899 @*/
4900 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4901 {
4902   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4903   PetscErrorCode    ierr;
4904   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4905   IS                isrowb,iscolb;
4906   Mat               *bseq;
4907 
4908   PetscFunctionBegin;
4909   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4910     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4911   }
4912   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4913 
4914   if (scall == MAT_INITIAL_MATRIX){
4915     start = A->cmap->rstart;
4916     cmap  = a->garray;
4917     nzA   = a->A->cmap->n;
4918     nzB   = a->B->cmap->n;
4919     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4920     ncols = 0;
4921     for (i=0; i<nzB; i++) {  /* row < local row index */
4922       if (cmap[i] < start) idx[ncols++] = cmap[i];
4923       else break;
4924     }
4925     imark = i;
4926     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4927     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4928     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4929     *brstart = imark;
4930     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4931   } else {
4932     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4933     isrowb = *rowb; iscolb = *colb;
4934     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4935     bseq[0] = *B_seq;
4936   }
4937   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4938   *B_seq = bseq[0];
4939   ierr = PetscFree(bseq);CHKERRQ(ierr);
4940   if (!rowb){
4941     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4942   } else {
4943     *rowb = isrowb;
4944   }
4945   if (!colb){
4946     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4947   } else {
4948     *colb = iscolb;
4949   }
4950   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4951   PetscFunctionReturn(0);
4952 }
4953 
4954 #undef __FUNCT__
4955 #define __FUNCT__ "MatGetBrowsOfAoCols"
4956 /*@C
4957     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4958     of the OFF-DIAGONAL portion of local A
4959 
4960     Collective on Mat
4961 
4962    Input Parameters:
4963 +    A,B - the matrices in mpiaij format
4964 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4965 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4966 .    startsj_r - similar to startsj for receives
4967 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4968 
4969    Output Parameter:
4970 +    B_oth - the sequential matrix generated
4971 
4972     Level: developer
4973 
4974 @*/
4975 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4976 {
4977   VecScatter_MPI_General *gen_to,*gen_from;
4978   PetscErrorCode         ierr;
4979   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4980   Mat_SeqAIJ             *b_oth;
4981   VecScatter             ctx=a->Mvctx;
4982   MPI_Comm               comm=((PetscObject)ctx)->comm;
4983   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4984   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4985   PetscScalar            *rvalues,*svalues;
4986   MatScalar              *b_otha,*bufa,*bufA;
4987   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4988   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4989   MPI_Status             *sstatus,rstatus;
4990   PetscMPIInt            jj;
4991   PetscInt               *cols,sbs,rbs;
4992   PetscScalar            *vals;
4993 
4994   PetscFunctionBegin;
4995   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4996     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4997   }
4998   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4999   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5000 
5001   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5002   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5003   rvalues  = gen_from->values; /* holds the length of receiving row */
5004   svalues  = gen_to->values;   /* holds the length of sending row */
5005   nrecvs   = gen_from->n;
5006   nsends   = gen_to->n;
5007 
5008   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5009   srow     = gen_to->indices;   /* local row index to be sent */
5010   sstarts  = gen_to->starts;
5011   sprocs   = gen_to->procs;
5012   sstatus  = gen_to->sstatus;
5013   sbs      = gen_to->bs;
5014   rstarts  = gen_from->starts;
5015   rprocs   = gen_from->procs;
5016   rbs      = gen_from->bs;
5017 
5018   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5019   if (scall == MAT_INITIAL_MATRIX){
5020     /* i-array */
5021     /*---------*/
5022     /*  post receives */
5023     for (i=0; i<nrecvs; i++){
5024       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5025       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5026       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5027     }
5028 
5029     /* pack the outgoing message */
5030     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5031     sstartsj[0] = 0;  rstartsj[0] = 0;
5032     len = 0; /* total length of j or a array to be sent */
5033     k = 0;
5034     for (i=0; i<nsends; i++){
5035       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5036       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5037       for (j=0; j<nrows; j++) {
5038         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5039         for (l=0; l<sbs; l++){
5040           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5041           rowlen[j*sbs+l] = ncols;
5042           len += ncols;
5043           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5044         }
5045         k++;
5046       }
5047       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5048       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5049     }
5050     /* recvs and sends of i-array are completed */
5051     i = nrecvs;
5052     while (i--) {
5053       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5054     }
5055     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5056 
5057     /* allocate buffers for sending j and a arrays */
5058     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5059     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5060 
5061     /* create i-array of B_oth */
5062     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5063     b_othi[0] = 0;
5064     len = 0; /* total length of j or a array to be received */
5065     k = 0;
5066     for (i=0; i<nrecvs; i++){
5067       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5068       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5069       for (j=0; j<nrows; j++) {
5070         b_othi[k+1] = b_othi[k] + rowlen[j];
5071         len += rowlen[j]; k++;
5072       }
5073       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5074     }
5075 
5076     /* allocate space for j and a arrrays of B_oth */
5077     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5078     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5079 
5080     /* j-array */
5081     /*---------*/
5082     /*  post receives of j-array */
5083     for (i=0; i<nrecvs; i++){
5084       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5085       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5086     }
5087 
5088     /* pack the outgoing message j-array */
5089     k = 0;
5090     for (i=0; i<nsends; i++){
5091       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5092       bufJ = bufj+sstartsj[i];
5093       for (j=0; j<nrows; j++) {
5094         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5095         for (ll=0; ll<sbs; ll++){
5096           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5097           for (l=0; l<ncols; l++){
5098             *bufJ++ = cols[l];
5099           }
5100           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5101         }
5102       }
5103       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5104     }
5105 
5106     /* recvs and sends of j-array are completed */
5107     i = nrecvs;
5108     while (i--) {
5109       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5110     }
5111     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5112   } else if (scall == MAT_REUSE_MATRIX){
5113     sstartsj = *startsj;
5114     rstartsj = *startsj_r;
5115     bufa     = *bufa_ptr;
5116     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5117     b_otha   = b_oth->a;
5118   } else {
5119     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5120   }
5121 
5122   /* a-array */
5123   /*---------*/
5124   /*  post receives of a-array */
5125   for (i=0; i<nrecvs; i++){
5126     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5127     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5128   }
5129 
5130   /* pack the outgoing message a-array */
5131   k = 0;
5132   for (i=0; i<nsends; i++){
5133     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5134     bufA = bufa+sstartsj[i];
5135     for (j=0; j<nrows; j++) {
5136       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5137       for (ll=0; ll<sbs; ll++){
5138         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5139         for (l=0; l<ncols; l++){
5140           *bufA++ = vals[l];
5141         }
5142         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5143       }
5144     }
5145     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5146   }
5147   /* recvs and sends of a-array are completed */
5148   i = nrecvs;
5149   while (i--) {
5150     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5151   }
5152   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5153   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5154 
5155   if (scall == MAT_INITIAL_MATRIX){
5156     /* put together the new matrix */
5157     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5158 
5159     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5160     /* Since these are PETSc arrays, change flags to free them as necessary. */
5161     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5162     b_oth->free_a  = PETSC_TRUE;
5163     b_oth->free_ij = PETSC_TRUE;
5164     b_oth->nonew   = 0;
5165 
5166     ierr = PetscFree(bufj);CHKERRQ(ierr);
5167     if (!startsj || !bufa_ptr){
5168       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5169       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5170     } else {
5171       *startsj   = sstartsj;
5172       *startsj_r = rstartsj;
5173       *bufa_ptr  = bufa;
5174     }
5175   }
5176   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5177   PetscFunctionReturn(0);
5178 }
5179 
5180 #undef __FUNCT__
5181 #define __FUNCT__ "MatGetCommunicationStructs"
5182 /*@C
5183   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5184 
5185   Not Collective
5186 
5187   Input Parameters:
5188 . A - The matrix in mpiaij format
5189 
5190   Output Parameter:
5191 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5192 . colmap - A map from global column index to local index into lvec
5193 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5194 
5195   Level: developer
5196 
5197 @*/
5198 #if defined (PETSC_USE_CTABLE)
5199 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5200 #else
5201 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5202 #endif
5203 {
5204   Mat_MPIAIJ *a;
5205 
5206   PetscFunctionBegin;
5207   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5208   PetscValidPointer(lvec, 2);
5209   PetscValidPointer(colmap, 3);
5210   PetscValidPointer(multScatter, 4);
5211   a = (Mat_MPIAIJ *) A->data;
5212   if (lvec) *lvec = a->lvec;
5213   if (colmap) *colmap = a->colmap;
5214   if (multScatter) *multScatter = a->Mvctx;
5215   PetscFunctionReturn(0);
5216 }
5217 
5218 EXTERN_C_BEGIN
5219 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5220 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5221 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5222 EXTERN_C_END
5223 
5224 #undef __FUNCT__
5225 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5226 /*
5227     Computes (B'*A')' since computing B*A directly is untenable
5228 
5229                n                       p                          p
5230         (              )       (              )         (                  )
5231       m (      A       )  *  n (       B      )   =   m (         C        )
5232         (              )       (              )         (                  )
5233 
5234 */
5235 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5236 {
5237   PetscErrorCode     ierr;
5238   Mat                At,Bt,Ct;
5239 
5240   PetscFunctionBegin;
5241   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5242   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5243   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5244   ierr = MatDestroy(At);CHKERRQ(ierr);
5245   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5246   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5247   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5248   PetscFunctionReturn(0);
5249 }
5250 
5251 #undef __FUNCT__
5252 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5253 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5254 {
5255   PetscErrorCode ierr;
5256   PetscInt       m=A->rmap->n,n=B->cmap->n;
5257   Mat            Cmat;
5258 
5259   PetscFunctionBegin;
5260   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5261   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5262   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5263   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5264   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5265   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5266   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5267   *C   = Cmat;
5268   PetscFunctionReturn(0);
5269 }
5270 
5271 /* ----------------------------------------------------------------*/
5272 #undef __FUNCT__
5273 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5274 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5275 {
5276   PetscErrorCode ierr;
5277 
5278   PetscFunctionBegin;
5279   if (scall == MAT_INITIAL_MATRIX){
5280     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5281   }
5282   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5283   PetscFunctionReturn(0);
5284 }
5285 
5286 EXTERN_C_BEGIN
5287 #if defined(PETSC_HAVE_MUMPS)
5288 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5289 #endif
5290 #if defined(PETSC_HAVE_PASTIX)
5291 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5292 #endif
5293 #if defined(PETSC_HAVE_SUPERLU_DIST)
5294 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5295 #endif
5296 #if defined(PETSC_HAVE_SPOOLES)
5297 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5298 #endif
5299 EXTERN_C_END
5300 
5301 /*MC
5302    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5303 
5304    Options Database Keys:
5305 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5306 
5307   Level: beginner
5308 
5309 .seealso: MatCreateMPIAIJ()
5310 M*/
5311 
5312 EXTERN_C_BEGIN
5313 #undef __FUNCT__
5314 #define __FUNCT__ "MatCreate_MPIAIJ"
5315 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5316 {
5317   Mat_MPIAIJ     *b;
5318   PetscErrorCode ierr;
5319   PetscMPIInt    size;
5320 
5321   PetscFunctionBegin;
5322   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5323 
5324   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5325   B->data         = (void*)b;
5326   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5327   B->rmap->bs     = 1;
5328   B->assembled    = PETSC_FALSE;
5329 
5330   B->insertmode   = NOT_SET_VALUES;
5331   b->size         = size;
5332   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5333 
5334   /* build cache for off array entries formed */
5335   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5336   b->donotstash  = PETSC_FALSE;
5337   b->colmap      = 0;
5338   b->garray      = 0;
5339   b->roworiented = PETSC_TRUE;
5340 
5341   /* stuff used for matrix vector multiply */
5342   b->lvec      = PETSC_NULL;
5343   b->Mvctx     = PETSC_NULL;
5344 
5345   /* stuff for MatGetRow() */
5346   b->rowindices   = 0;
5347   b->rowvalues    = 0;
5348   b->getrowactive = PETSC_FALSE;
5349 
5350 #if defined(PETSC_HAVE_SPOOLES)
5351   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5352                                      "MatGetFactor_mpiaij_spooles",
5353                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5354 #endif
5355 #if defined(PETSC_HAVE_MUMPS)
5356   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5357                                      "MatGetFactor_aij_mumps",
5358                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5359 #endif
5360 #if defined(PETSC_HAVE_PASTIX)
5361   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5362 					   "MatGetFactor_mpiaij_pastix",
5363 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5364 #endif
5365 #if defined(PETSC_HAVE_SUPERLU_DIST)
5366   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5367                                      "MatGetFactor_mpiaij_superlu_dist",
5368                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5369 #endif
5370   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5371                                      "MatStoreValues_MPIAIJ",
5372                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5373   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5374                                      "MatRetrieveValues_MPIAIJ",
5375                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5376   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5377 				     "MatGetDiagonalBlock_MPIAIJ",
5378                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5379   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5380 				     "MatIsTranspose_MPIAIJ",
5381 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5382   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5383 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5384 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5385   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5386 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5387 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5388   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5389 				     "MatDiagonalScaleLocal_MPIAIJ",
5390 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5391   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5392                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5393                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5394   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5395                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5396                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5397   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5398                                      "MatConvert_MPIAIJ_MPISBAIJ",
5399                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5400   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5401                                      "MatMatMult_MPIDense_MPIAIJ",
5402                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5403   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5404                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5405                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5406   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5407                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5408                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5409   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5410   PetscFunctionReturn(0);
5411 }
5412 EXTERN_C_END
5413 
5414 #undef __FUNCT__
5415 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5416 /*@
5417      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5418          and "off-diagonal" part of the matrix in CSR format.
5419 
5420    Collective on MPI_Comm
5421 
5422    Input Parameters:
5423 +  comm - MPI communicator
5424 .  m - number of local rows (Cannot be PETSC_DECIDE)
5425 .  n - This value should be the same as the local size used in creating the
5426        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5427        calculated if N is given) For square matrices n is almost always m.
5428 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5429 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5430 .   i - row indices for "diagonal" portion of matrix
5431 .   j - column indices
5432 .   a - matrix values
5433 .   oi - row indices for "off-diagonal" portion of matrix
5434 .   oj - column indices
5435 -   oa - matrix values
5436 
5437    Output Parameter:
5438 .   mat - the matrix
5439 
5440    Level: advanced
5441 
5442    Notes:
5443        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5444 
5445        The i and j indices are 0 based
5446 
5447        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5448 
5449        This sets local rows and cannot be used to set off-processor values.
5450 
5451        You cannot later use MatSetValues() to change values in this matrix.
5452 
5453 .keywords: matrix, aij, compressed row, sparse, parallel
5454 
5455 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5456           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5457 @*/
5458 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5459 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5460 {
5461   PetscErrorCode ierr;
5462   Mat_MPIAIJ     *maij;
5463 
5464  PetscFunctionBegin;
5465   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5466   if (i[0]) {
5467     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5468   }
5469   if (oi[0]) {
5470     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5471   }
5472   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5473   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5474   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5475   maij = (Mat_MPIAIJ*) (*mat)->data;
5476   maij->donotstash     = PETSC_TRUE;
5477   (*mat)->preallocated = PETSC_TRUE;
5478 
5479   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5480   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5481   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5482   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5483 
5484   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5485   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5486 
5487   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5488   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5489   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5490   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5491 
5492   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5493   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5494   PetscFunctionReturn(0);
5495 }
5496 
5497 /*
5498     Special version for direct calls from Fortran
5499 */
5500 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5501 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5502 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5503 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5504 #endif
5505 
5506 /* Change these macros so can be used in void function */
5507 #undef CHKERRQ
5508 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5509 #undef SETERRQ2
5510 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5511 #undef SETERRQ
5512 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5513 
5514 EXTERN_C_BEGIN
5515 #undef __FUNCT__
5516 #define __FUNCT__ "matsetvaluesmpiaij_"
5517 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5518 {
5519   Mat             mat = *mmat;
5520   PetscInt        m = *mm, n = *mn;
5521   InsertMode      addv = *maddv;
5522   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5523   PetscScalar     value;
5524   PetscErrorCode  ierr;
5525 
5526   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5527   if (mat->insertmode == NOT_SET_VALUES) {
5528     mat->insertmode = addv;
5529   }
5530 #if defined(PETSC_USE_DEBUG)
5531   else if (mat->insertmode != addv) {
5532     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5533   }
5534 #endif
5535   {
5536   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5537   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5538   PetscBool       roworiented = aij->roworiented;
5539 
5540   /* Some Variables required in the macro */
5541   Mat             A = aij->A;
5542   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5543   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5544   MatScalar       *aa = a->a;
5545   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5546   Mat             B = aij->B;
5547   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5548   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5549   MatScalar       *ba = b->a;
5550 
5551   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5552   PetscInt        nonew = a->nonew;
5553   MatScalar       *ap1,*ap2;
5554 
5555   PetscFunctionBegin;
5556   for (i=0; i<m; i++) {
5557     if (im[i] < 0) continue;
5558 #if defined(PETSC_USE_DEBUG)
5559     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5560 #endif
5561     if (im[i] >= rstart && im[i] < rend) {
5562       row      = im[i] - rstart;
5563       lastcol1 = -1;
5564       rp1      = aj + ai[row];
5565       ap1      = aa + ai[row];
5566       rmax1    = aimax[row];
5567       nrow1    = ailen[row];
5568       low1     = 0;
5569       high1    = nrow1;
5570       lastcol2 = -1;
5571       rp2      = bj + bi[row];
5572       ap2      = ba + bi[row];
5573       rmax2    = bimax[row];
5574       nrow2    = bilen[row];
5575       low2     = 0;
5576       high2    = nrow2;
5577 
5578       for (j=0; j<n; j++) {
5579         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5580         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5581         if (in[j] >= cstart && in[j] < cend){
5582           col = in[j] - cstart;
5583           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5584         } else if (in[j] < 0) continue;
5585 #if defined(PETSC_USE_DEBUG)
5586         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5587 #endif
5588         else {
5589           if (mat->was_assembled) {
5590             if (!aij->colmap) {
5591               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5592             }
5593 #if defined (PETSC_USE_CTABLE)
5594             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5595 	    col--;
5596 #else
5597             col = aij->colmap[in[j]] - 1;
5598 #endif
5599             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5600               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5601               col =  in[j];
5602               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5603               B = aij->B;
5604               b = (Mat_SeqAIJ*)B->data;
5605               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5606               rp2      = bj + bi[row];
5607               ap2      = ba + bi[row];
5608               rmax2    = bimax[row];
5609               nrow2    = bilen[row];
5610               low2     = 0;
5611               high2    = nrow2;
5612               bm       = aij->B->rmap->n;
5613               ba = b->a;
5614             }
5615           } else col = in[j];
5616           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5617         }
5618       }
5619     } else {
5620       if (!aij->donotstash) {
5621         if (roworiented) {
5622           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5623         } else {
5624           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5625         }
5626       }
5627     }
5628   }}
5629   PetscFunctionReturnVoid();
5630 }
5631 EXTERN_C_END
5632 
5633