xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d620caa5e0e4d8bf2f876fe3c5a43e2ee15531eb)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscBool      aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscBool      roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscBool      ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash || mat->nooffprocentries) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   mat->insertmode = addv; /* in case this processor had no cache */
470 
471   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
472   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
473   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
480 {
481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
482   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
483   PetscErrorCode ierr;
484   PetscMPIInt    n;
485   PetscInt       i,j,rstart,ncols,flg;
486   PetscInt       *row,*col;
487   PetscBool      other_disassembled;
488   PetscScalar    *val;
489   InsertMode     addv = mat->insertmode;
490 
491   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
492   PetscFunctionBegin;
493   if (!aij->donotstash && !mat->nooffprocentries) {
494     while (1) {
495       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
496       if (!flg) break;
497 
498       for (i=0; i<n;) {
499         /* Now identify the consecutive vals belonging to the same row */
500         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
501         if (j < n) ncols = j-i;
502         else       ncols = n-i;
503         /* Now assemble all these values with a single function call */
504         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
505         i = j;
506       }
507     }
508     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
509   }
510   a->compressedrow.use     = PETSC_FALSE;
511   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
512   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
513 
514   /* determine if any processor has disassembled, if so we must
515      also disassemble ourselfs, in order that we may reassemble. */
516   /*
517      if nonzero structure of submatrix B cannot change then we know that
518      no processor disassembled thus we can skip this stuff
519   */
520   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
521     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
522     if (mat->was_assembled && !other_disassembled) {
523       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
524     }
525   }
526   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
527     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
528   }
529   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
530   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
531   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
532   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
533 
534   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
535   aij->rowvalues = 0;
536 
537   /* used by MatAXPY() */
538   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
539   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
540 
541   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
542   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
562 {
563   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode    ierr;
565   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt          i,*owners = A->rmap->range;
567   PetscInt          *nprocs,j,idx,nsends,row;
568   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt          *rvalues,count,base,slen,*source;
570   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm          comm = ((PetscObject)A)->comm;
572   MPI_Request       *send_waits,*recv_waits;
573   MPI_Status        recv_status,*send_status;
574   const PetscScalar *xx;
575   PetscScalar       *bb;
576 #if defined(PETSC_DEBUG)
577   PetscBool      found = PETSC_FALSE;
578 #endif
579 
580   PetscFunctionBegin;
581   /*  first count number of contributors to each processor */
582   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
583   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
584   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
585   j = 0;
586   for (i=0; i<N; i++) {
587     if (lastidx > (idx = rows[i])) j = 0;
588     lastidx = idx;
589     for (; j<size; j++) {
590       if (idx >= owners[j] && idx < owners[j+1]) {
591         nprocs[2*j]++;
592         nprocs[2*j+1] = 1;
593         owner[i] = j;
594 #if defined(PETSC_DEBUG)
595         found = PETSC_TRUE;
596 #endif
597         break;
598       }
599     }
600 #if defined(PETSC_DEBUG)
601     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
602     found = PETSC_FALSE;
603 #endif
604   }
605   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
606 
607   if (A->nooffproczerorows) {
608     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
609     nrecvs = nsends;
610     nmax   = N;
611   } else {
612     /* inform other processors of number of messages and max length*/
613     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
614   }
615 
616   /* post receives:   */
617   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
618   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
619   for (i=0; i<nrecvs; i++) {
620     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
621   }
622 
623   /* do sends:
624       1) starts[i] gives the starting index in svalues for stuff going to
625          the ith processor
626   */
627   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
628   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
629   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
630   starts[0] = 0;
631   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
632   for (i=0; i<N; i++) {
633     svalues[starts[owner[i]]++] = rows[i];
634   }
635 
636   starts[0] = 0;
637   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
638   count = 0;
639   for (i=0; i<size; i++) {
640     if (nprocs[2*i+1]) {
641       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
642     }
643   }
644   ierr = PetscFree(starts);CHKERRQ(ierr);
645 
646   base = owners[rank];
647 
648   /*  wait on receives */
649   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
650   count  = nrecvs; slen = 0;
651   while (count) {
652     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
653     /* unpack receives into our local space */
654     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
655     source[imdex]  = recv_status.MPI_SOURCE;
656     lens[imdex]    = n;
657     slen          += n;
658     count--;
659   }
660   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
661 
662   /* move the data into the send scatter */
663   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
664   count = 0;
665   for (i=0; i<nrecvs; i++) {
666     values = rvalues + i*nmax;
667     for (j=0; j<lens[i]; j++) {
668       lrows[count++] = values[j] - base;
669     }
670   }
671   ierr = PetscFree(rvalues);CHKERRQ(ierr);
672   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
673   ierr = PetscFree(owner);CHKERRQ(ierr);
674   ierr = PetscFree(nprocs);CHKERRQ(ierr);
675 
676   /* fix right hand side if needed */
677   if (x && b) {
678     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
679     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
680     for (i=0; i<slen; i++) {
681       bb[lrows[i]] = diag*xx[lrows[i]];
682     }
683     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
684     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
685   }
686   /*
687         Zero the required rows. If the "diagonal block" of the matrix
688      is square and the user wishes to set the diagonal we use separate
689      code so that MatSetValues() is not called for each diagonal allocating
690      new memory, thus calling lots of mallocs and slowing things down.
691 
692   */
693   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
694   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
695   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
696     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
697   } else if (diag != 0.0) {
698     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
699     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
700       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
701 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
702     }
703     for (i = 0; i < slen; i++) {
704       row  = lrows[i] + rstart;
705       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
706     }
707     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
709   } else {
710     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
711   }
712   ierr = PetscFree(lrows);CHKERRQ(ierr);
713 
714   /* wait on sends */
715   if (nsends) {
716     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
717     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
718     ierr = PetscFree(send_status);CHKERRQ(ierr);
719   }
720   ierr = PetscFree(send_waits);CHKERRQ(ierr);
721   ierr = PetscFree(svalues);CHKERRQ(ierr);
722 
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
728 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
729 {
730   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
731   PetscErrorCode    ierr;
732   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
733   PetscInt          i,*owners = A->rmap->range;
734   PetscInt          *nprocs,j,idx,nsends;
735   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
736   PetscInt          *rvalues,count,base,slen,*source;
737   PetscInt          *lens,*lrows,*values,m;
738   MPI_Comm          comm = ((PetscObject)A)->comm;
739   MPI_Request       *send_waits,*recv_waits;
740   MPI_Status        recv_status,*send_status;
741   const PetscScalar *xx;
742   PetscScalar       *bb,*mask;
743   Vec               xmask,lmask;
744   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
745   const PetscInt    *aj, *ii,*ridx;
746   PetscScalar       *aa;
747 #if defined(PETSC_DEBUG)
748   PetscBool         found = PETSC_FALSE;
749 #endif
750 
751   PetscFunctionBegin;
752   /*  first count number of contributors to each processor */
753   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
754   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
755   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
756   j = 0;
757   for (i=0; i<N; i++) {
758     if (lastidx > (idx = rows[i])) j = 0;
759     lastidx = idx;
760     for (; j<size; j++) {
761       if (idx >= owners[j] && idx < owners[j+1]) {
762         nprocs[2*j]++;
763         nprocs[2*j+1] = 1;
764         owner[i] = j;
765 #if defined(PETSC_DEBUG)
766         found = PETSC_TRUE;
767 #endif
768         break;
769       }
770     }
771 #if defined(PETSC_DEBUG)
772     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
773     found = PETSC_FALSE;
774 #endif
775   }
776   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
777 
778   /* inform other processors of number of messages and max length*/
779   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
780 
781   /* post receives:   */
782   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
783   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
784   for (i=0; i<nrecvs; i++) {
785     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
786   }
787 
788   /* do sends:
789       1) starts[i] gives the starting index in svalues for stuff going to
790          the ith processor
791   */
792   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
793   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
794   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
795   starts[0] = 0;
796   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
797   for (i=0; i<N; i++) {
798     svalues[starts[owner[i]]++] = rows[i];
799   }
800 
801   starts[0] = 0;
802   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
803   count = 0;
804   for (i=0; i<size; i++) {
805     if (nprocs[2*i+1]) {
806       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
807     }
808   }
809   ierr = PetscFree(starts);CHKERRQ(ierr);
810 
811   base = owners[rank];
812 
813   /*  wait on receives */
814   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
815   count  = nrecvs; slen = 0;
816   while (count) {
817     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
818     /* unpack receives into our local space */
819     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
820     source[imdex]  = recv_status.MPI_SOURCE;
821     lens[imdex]    = n;
822     slen          += n;
823     count--;
824   }
825   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
826 
827   /* move the data into the send scatter */
828   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
829   count = 0;
830   for (i=0; i<nrecvs; i++) {
831     values = rvalues + i*nmax;
832     for (j=0; j<lens[i]; j++) {
833       lrows[count++] = values[j] - base;
834     }
835   }
836   ierr = PetscFree(rvalues);CHKERRQ(ierr);
837   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
838   ierr = PetscFree(owner);CHKERRQ(ierr);
839   ierr = PetscFree(nprocs);CHKERRQ(ierr);
840   /* lrows are the local rows to be zeroed, slen is the number of local rows */
841 
842   /* zero diagonal part of matrix */
843   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
844 
845   /* handle off diagonal part of matrix */
846   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
847   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
848   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
849   for (i=0; i<slen; i++) {
850     bb[lrows[i]] = 1;
851   }
852   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
853   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
854   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
855   ierr = VecDestroy(xmask);CHKERRQ(ierr);
856   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
857   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
859   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
860   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
861 
862   /* remove zeroed rows of off diagonal matrix */
863   ii = aij->i;
864   for (i=0; i<slen; i++) {
865     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
866   }
867 
868   /* loop over all elements of off process part of matrix zeroing removed columns*/
869   if (aij->compressedrow.use){
870     m    = aij->compressedrow.nrows;
871     ii   = aij->compressedrow.i;
872     ridx = aij->compressedrow.rindex;
873     for (i=0; i<m; i++){
874       n   = ii[i+1] - ii[i];
875       aj  = aij->j + ii[i];
876       aa  = aij->a + ii[i];
877 
878       for (j=0; j<n; j++) {
879         if (mask[*aj]) {
880           bb[*ridx] -= *aa*xx[*aj];
881           *aa        = 0.0;
882         }
883         aa++;
884         aj++;
885       }
886       ridx++;
887     }
888   } else { /* do not use compressed row format */
889     m = l->B->rmap->n;
890    for (i=0; i<m; i++) {
891      n   = ii[i+1] - ii[i];
892      aj  = aij->j + ii[i];
893      aa  = aij->a + ii[i];
894      for (j=0; j<n; j++) {
895         if (mask[*aj]) {
896           bb[i] -= *aa*xx[*aj];
897           *aa    = 0.0;
898         }
899         aa++;
900         aj++;
901       }
902     }
903   }
904 
905   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
906   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
907   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
908   ierr = VecDestroy(lmask);CHKERRQ(ierr);
909   ierr = PetscFree(lrows);CHKERRQ(ierr);
910 
911   /* wait on sends */
912   if (nsends) {
913     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
914     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
915     ierr = PetscFree(send_status);CHKERRQ(ierr);
916   }
917   ierr = PetscFree(send_waits);CHKERRQ(ierr);
918   ierr = PetscFree(svalues);CHKERRQ(ierr);
919 
920   PetscFunctionReturn(0);
921 }
922 
923 #undef __FUNCT__
924 #define __FUNCT__ "MatMult_MPIAIJ"
925 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
926 {
927   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
928   PetscErrorCode ierr;
929   PetscInt       nt;
930 
931   PetscFunctionBegin;
932   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
933   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
934   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
935   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
936   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
937   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
943 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
944 {
945   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
946   PetscErrorCode ierr;
947 
948   PetscFunctionBegin;
949   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
950   PetscFunctionReturn(0);
951 }
952 
953 #undef __FUNCT__
954 #define __FUNCT__ "MatMultAdd_MPIAIJ"
955 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
956 {
957   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
958   PetscErrorCode ierr;
959 
960   PetscFunctionBegin;
961   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
963   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
964   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
970 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
971 {
972   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
973   PetscErrorCode ierr;
974   PetscBool      merged;
975 
976   PetscFunctionBegin;
977   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
978   /* do nondiagonal part */
979   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
980   if (!merged) {
981     /* send it on its way */
982     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
983     /* do local part */
984     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
985     /* receive remote parts: note this assumes the values are not actually */
986     /* added in yy until the next line, */
987     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
988   } else {
989     /* do local part */
990     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
991     /* send it on its way */
992     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
993     /* values actually were received in the Begin() but we need to call this nop */
994     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
995   }
996   PetscFunctionReturn(0);
997 }
998 
999 EXTERN_C_BEGIN
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1002 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1003 {
1004   MPI_Comm       comm;
1005   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1006   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1007   IS             Me,Notme;
1008   PetscErrorCode ierr;
1009   PetscInt       M,N,first,last,*notme,i;
1010   PetscMPIInt    size;
1011 
1012   PetscFunctionBegin;
1013 
1014   /* Easy test: symmetric diagonal block */
1015   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1016   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1017   if (!*f) PetscFunctionReturn(0);
1018   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1019   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1020   if (size == 1) PetscFunctionReturn(0);
1021 
1022   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1023   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1024   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1025   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1026   for (i=0; i<first; i++) notme[i] = i;
1027   for (i=last; i<M; i++) notme[i-last+first] = i;
1028   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1029   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1030   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1031   Aoff = Aoffs[0];
1032   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1033   Boff = Boffs[0];
1034   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1035   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1036   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1037   ierr = ISDestroy(Me);CHKERRQ(ierr);
1038   ierr = ISDestroy(Notme);CHKERRQ(ierr);
1039 
1040   PetscFunctionReturn(0);
1041 }
1042 EXTERN_C_END
1043 
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1046 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1047 {
1048   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1049   PetscErrorCode ierr;
1050 
1051   PetscFunctionBegin;
1052   /* do nondiagonal part */
1053   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1054   /* send it on its way */
1055   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1056   /* do local part */
1057   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1058   /* receive remote parts */
1059   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 /*
1064   This only works correctly for square matrices where the subblock A->A is the
1065    diagonal block
1066 */
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1069 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1070 {
1071   PetscErrorCode ierr;
1072   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1073 
1074   PetscFunctionBegin;
1075   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1076   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1077   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 #undef __FUNCT__
1082 #define __FUNCT__ "MatScale_MPIAIJ"
1083 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1084 {
1085   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1086   PetscErrorCode ierr;
1087 
1088   PetscFunctionBegin;
1089   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1090   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "MatDestroy_MPIAIJ"
1096 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1097 {
1098   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102 #if defined(PETSC_USE_LOG)
1103   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1104 #endif
1105   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1106   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
1107   if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);}
1108   if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);}
1109 #if defined (PETSC_USE_CTABLE)
1110   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
1111 #else
1112   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1113 #endif
1114   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1115   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
1116   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
1117   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1118   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1119   ierr = PetscFree(aij);CHKERRQ(ierr);
1120 
1121   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1135 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1136 {
1137   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1138   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1139   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1140   PetscErrorCode    ierr;
1141   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1142   int               fd;
1143   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1144   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1145   PetscScalar       *column_values;
1146   PetscInt          message_count,flowcontrolcount;
1147 
1148   PetscFunctionBegin;
1149   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1150   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1151   nz   = A->nz + B->nz;
1152   if (!rank) {
1153     header[0] = MAT_FILE_CLASSID;
1154     header[1] = mat->rmap->N;
1155     header[2] = mat->cmap->N;
1156     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1157     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1158     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1159     /* get largest number of rows any processor has */
1160     rlen = mat->rmap->n;
1161     range = mat->rmap->range;
1162     for (i=1; i<size; i++) {
1163       rlen = PetscMax(rlen,range[i+1] - range[i]);
1164     }
1165   } else {
1166     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1167     rlen = mat->rmap->n;
1168   }
1169 
1170   /* load up the local row counts */
1171   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1172   for (i=0; i<mat->rmap->n; i++) {
1173     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1174   }
1175 
1176   /* store the row lengths to the file */
1177   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1178   if (!rank) {
1179     MPI_Status status;
1180     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1181     for (i=1; i<size; i++) {
1182       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1183       rlen = range[i+1] - range[i];
1184       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1185       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1186     }
1187     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1188   } else {
1189     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1190     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1191     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1192   }
1193   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1194 
1195   /* load up the local column indices */
1196   nzmax = nz; /* )th processor needs space a largest processor needs */
1197   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1198   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1199   cnt  = 0;
1200   for (i=0; i<mat->rmap->n; i++) {
1201     for (j=B->i[i]; j<B->i[i+1]; j++) {
1202       if ( (col = garray[B->j[j]]) > cstart) break;
1203       column_indices[cnt++] = col;
1204     }
1205     for (k=A->i[i]; k<A->i[i+1]; k++) {
1206       column_indices[cnt++] = A->j[k] + cstart;
1207     }
1208     for (; j<B->i[i+1]; j++) {
1209       column_indices[cnt++] = garray[B->j[j]];
1210     }
1211   }
1212   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1213 
1214   /* store the column indices to the file */
1215    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1216   if (!rank) {
1217     MPI_Status status;
1218     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1219     for (i=1; i<size; i++) {
1220       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1221       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1222       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1223       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1224       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1225     }
1226      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1227   } else {
1228     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1229     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1230     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1231     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1232   }
1233   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1234 
1235   /* load up the local column values */
1236   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1237   cnt  = 0;
1238   for (i=0; i<mat->rmap->n; i++) {
1239     for (j=B->i[i]; j<B->i[i+1]; j++) {
1240       if ( garray[B->j[j]] > cstart) break;
1241       column_values[cnt++] = B->a[j];
1242     }
1243     for (k=A->i[i]; k<A->i[i+1]; k++) {
1244       column_values[cnt++] = A->a[k];
1245     }
1246     for (; j<B->i[i+1]; j++) {
1247       column_values[cnt++] = B->a[j];
1248     }
1249   }
1250   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1251 
1252   /* store the column values to the file */
1253    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1254   if (!rank) {
1255     MPI_Status status;
1256     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1257     for (i=1; i<size; i++) {
1258        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1259       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1260       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1261       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1262       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1263     }
1264     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1265   } else {
1266     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1267     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1268     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1269     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1270   }
1271   ierr = PetscFree(column_values);CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1277 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1278 {
1279   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1280   PetscErrorCode    ierr;
1281   PetscMPIInt       rank = aij->rank,size = aij->size;
1282   PetscBool         isdraw,iascii,isbinary;
1283   PetscViewer       sviewer;
1284   PetscViewerFormat format;
1285 
1286   PetscFunctionBegin;
1287   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1288   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1289   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1290   if (iascii) {
1291     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1292     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1293       MatInfo    info;
1294       PetscBool  inodes;
1295 
1296       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1297       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1298       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1299       if (!inodes) {
1300         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1301 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1302       } else {
1303         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1304 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1305       }
1306       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1307       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1308       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1309       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1310       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1311       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1312       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1313       PetscFunctionReturn(0);
1314     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1315       PetscInt   inodecount,inodelimit,*inodes;
1316       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1317       if (inodes) {
1318         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1319       } else {
1320         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1321       }
1322       PetscFunctionReturn(0);
1323     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1324       PetscFunctionReturn(0);
1325     }
1326   } else if (isbinary) {
1327     if (size == 1) {
1328       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1329       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1330     } else {
1331       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1332     }
1333     PetscFunctionReturn(0);
1334   } else if (isdraw) {
1335     PetscDraw  draw;
1336     PetscBool  isnull;
1337     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1338     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1339   }
1340 
1341   if (size == 1) {
1342     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1343     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1344   } else {
1345     /* assemble the entire matrix onto first processor. */
1346     Mat         A;
1347     Mat_SeqAIJ  *Aloc;
1348     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1349     MatScalar   *a;
1350 
1351     if (mat->rmap->N > 1024) {
1352       PetscBool  flg = PETSC_FALSE;
1353 
1354       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1355       if (!flg) {
1356         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1357       }
1358     }
1359 
1360     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1361     if (!rank) {
1362       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1363     } else {
1364       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1365     }
1366     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1367     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1368     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1369     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1370 
1371     /* copy over the A part */
1372     Aloc = (Mat_SeqAIJ*)aij->A->data;
1373     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1374     row = mat->rmap->rstart;
1375     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1376     for (i=0; i<m; i++) {
1377       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1378       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1379     }
1380     aj = Aloc->j;
1381     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1382 
1383     /* copy over the B part */
1384     Aloc = (Mat_SeqAIJ*)aij->B->data;
1385     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1386     row  = mat->rmap->rstart;
1387     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1388     ct   = cols;
1389     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1390     for (i=0; i<m; i++) {
1391       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1392       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1393     }
1394     ierr = PetscFree(ct);CHKERRQ(ierr);
1395     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1396     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1397     /*
1398        Everyone has to call to draw the matrix since the graphics waits are
1399        synchronized across all processors that share the PetscDraw object
1400     */
1401     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1402     if (!rank) {
1403       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1404       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1405       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1406       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1407     }
1408     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1409     ierr = MatDestroy(A);CHKERRQ(ierr);
1410   }
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatView_MPIAIJ"
1416 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1417 {
1418   PetscErrorCode ierr;
1419   PetscBool      iascii,isdraw,issocket,isbinary;
1420 
1421   PetscFunctionBegin;
1422   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1423   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1424   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1425   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1426   if (iascii || isdraw || isbinary || issocket) {
1427     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1428   } else {
1429     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1430   }
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatSOR_MPIAIJ"
1436 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1437 {
1438   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1439   PetscErrorCode ierr;
1440   Vec            bb1 = 0;
1441   PetscBool      hasop;
1442 
1443   PetscFunctionBegin;
1444   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1445     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1446   }
1447 
1448   if (flag == SOR_APPLY_UPPER) {
1449     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1450     PetscFunctionReturn(0);
1451   }
1452 
1453   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1454     if (flag & SOR_ZERO_INITIAL_GUESS) {
1455       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1456       its--;
1457     }
1458 
1459     while (its--) {
1460       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1461       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1462 
1463       /* update rhs: bb1 = bb - B*x */
1464       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1465       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1466 
1467       /* local sweep */
1468       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1469     }
1470   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1471     if (flag & SOR_ZERO_INITIAL_GUESS) {
1472       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1473       its--;
1474     }
1475     while (its--) {
1476       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1477       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1478 
1479       /* update rhs: bb1 = bb - B*x */
1480       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1481       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1482 
1483       /* local sweep */
1484       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1485     }
1486   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1487     if (flag & SOR_ZERO_INITIAL_GUESS) {
1488       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1489       its--;
1490     }
1491     while (its--) {
1492       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1493       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1494 
1495       /* update rhs: bb1 = bb - B*x */
1496       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1497       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1498 
1499       /* local sweep */
1500       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1501     }
1502   }  else if (flag & SOR_EISENSTAT) {
1503     Vec         xx1;
1504 
1505     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1506     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1507 
1508     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1509     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1510     if (!mat->diag) {
1511       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1512       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1513     }
1514     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1515     if (hasop) {
1516       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1517     } else {
1518       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1519     }
1520     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1521 
1522     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1523 
1524     /* local sweep */
1525     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1526     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1527     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1528   } else {
1529     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1530   }
1531 
1532   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatPermute_MPIAIJ"
1538 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1539 {
1540   MPI_Comm       comm,pcomm;
1541   PetscInt       first,local_size,nrows;
1542   const PetscInt *rows;
1543   PetscMPIInt    size;
1544   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1545   PetscErrorCode ierr;
1546 
1547   PetscFunctionBegin;
1548   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1549   /* make a collective version of 'rowp' */
1550   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1551   if (pcomm==comm) {
1552     crowp = rowp;
1553   } else {
1554     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1555     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1556     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1557     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1558   }
1559   /* collect the global row permutation and invert it */
1560   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1561   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1562   if (pcomm!=comm) {
1563     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1564   }
1565   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1566   /* get the local target indices */
1567   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1568   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1569   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1570   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1571   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1572   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1573   /* the column permutation is so much easier;
1574      make a local version of 'colp' and invert it */
1575   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1576   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1577   if (size==1) {
1578     lcolp = colp;
1579   } else {
1580     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1581     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1582     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1583   }
1584   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1585   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1586   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1587   if (size>1) {
1588     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1589     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1590   }
1591   /* now we just get the submatrix */
1592   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1593   /* clean up */
1594   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1595   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 #undef __FUNCT__
1600 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1601 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1602 {
1603   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1604   Mat            A = mat->A,B = mat->B;
1605   PetscErrorCode ierr;
1606   PetscReal      isend[5],irecv[5];
1607 
1608   PetscFunctionBegin;
1609   info->block_size     = 1.0;
1610   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1611   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1612   isend[3] = info->memory;  isend[4] = info->mallocs;
1613   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1614   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1615   isend[3] += info->memory;  isend[4] += info->mallocs;
1616   if (flag == MAT_LOCAL) {
1617     info->nz_used      = isend[0];
1618     info->nz_allocated = isend[1];
1619     info->nz_unneeded  = isend[2];
1620     info->memory       = isend[3];
1621     info->mallocs      = isend[4];
1622   } else if (flag == MAT_GLOBAL_MAX) {
1623     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1624     info->nz_used      = irecv[0];
1625     info->nz_allocated = irecv[1];
1626     info->nz_unneeded  = irecv[2];
1627     info->memory       = irecv[3];
1628     info->mallocs      = irecv[4];
1629   } else if (flag == MAT_GLOBAL_SUM) {
1630     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1631     info->nz_used      = irecv[0];
1632     info->nz_allocated = irecv[1];
1633     info->nz_unneeded  = irecv[2];
1634     info->memory       = irecv[3];
1635     info->mallocs      = irecv[4];
1636   }
1637   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1638   info->fill_ratio_needed = 0;
1639   info->factor_mallocs    = 0;
1640 
1641   PetscFunctionReturn(0);
1642 }
1643 
1644 #undef __FUNCT__
1645 #define __FUNCT__ "MatSetOption_MPIAIJ"
1646 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1647 {
1648   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1649   PetscErrorCode ierr;
1650 
1651   PetscFunctionBegin;
1652   switch (op) {
1653   case MAT_NEW_NONZERO_LOCATIONS:
1654   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1655   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1656   case MAT_KEEP_NONZERO_PATTERN:
1657   case MAT_NEW_NONZERO_LOCATION_ERR:
1658   case MAT_USE_INODES:
1659   case MAT_IGNORE_ZERO_ENTRIES:
1660     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1661     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1662     break;
1663   case MAT_ROW_ORIENTED:
1664     a->roworiented = flg;
1665     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1666     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1667     break;
1668   case MAT_NEW_DIAGONALS:
1669     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1670     break;
1671   case MAT_IGNORE_OFF_PROC_ENTRIES:
1672     a->donotstash = PETSC_TRUE;
1673     break;
1674   case MAT_SPD:
1675     A->spd_set                         = PETSC_TRUE;
1676     A->spd                             = flg;
1677     if (flg) {
1678       A->symmetric                     = PETSC_TRUE;
1679       A->structurally_symmetric        = PETSC_TRUE;
1680       A->symmetric_set                 = PETSC_TRUE;
1681       A->structurally_symmetric_set    = PETSC_TRUE;
1682     }
1683     break;
1684   case MAT_SYMMETRIC:
1685     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1686     break;
1687   case MAT_STRUCTURALLY_SYMMETRIC:
1688     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1689     break;
1690   case MAT_HERMITIAN:
1691     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1692     break;
1693   case MAT_SYMMETRY_ETERNAL:
1694     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1695     break;
1696   default:
1697     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1698   }
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatGetRow_MPIAIJ"
1704 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1705 {
1706   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1707   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1708   PetscErrorCode ierr;
1709   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1710   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1711   PetscInt       *cmap,*idx_p;
1712 
1713   PetscFunctionBegin;
1714   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1715   mat->getrowactive = PETSC_TRUE;
1716 
1717   if (!mat->rowvalues && (idx || v)) {
1718     /*
1719         allocate enough space to hold information from the longest row.
1720     */
1721     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1722     PetscInt   max = 1,tmp;
1723     for (i=0; i<matin->rmap->n; i++) {
1724       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1725       if (max < tmp) { max = tmp; }
1726     }
1727     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1728   }
1729 
1730   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1731   lrow = row - rstart;
1732 
1733   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1734   if (!v)   {pvA = 0; pvB = 0;}
1735   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1736   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1737   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1738   nztot = nzA + nzB;
1739 
1740   cmap  = mat->garray;
1741   if (v  || idx) {
1742     if (nztot) {
1743       /* Sort by increasing column numbers, assuming A and B already sorted */
1744       PetscInt imark = -1;
1745       if (v) {
1746         *v = v_p = mat->rowvalues;
1747         for (i=0; i<nzB; i++) {
1748           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1749           else break;
1750         }
1751         imark = i;
1752         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1753         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1754       }
1755       if (idx) {
1756         *idx = idx_p = mat->rowindices;
1757         if (imark > -1) {
1758           for (i=0; i<imark; i++) {
1759             idx_p[i] = cmap[cworkB[i]];
1760           }
1761         } else {
1762           for (i=0; i<nzB; i++) {
1763             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1764             else break;
1765           }
1766           imark = i;
1767         }
1768         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1769         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1770       }
1771     } else {
1772       if (idx) *idx = 0;
1773       if (v)   *v   = 0;
1774     }
1775   }
1776   *nz = nztot;
1777   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1778   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1779   PetscFunctionReturn(0);
1780 }
1781 
1782 #undef __FUNCT__
1783 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1784 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1785 {
1786   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1787 
1788   PetscFunctionBegin;
1789   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1790   aij->getrowactive = PETSC_FALSE;
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 #undef __FUNCT__
1795 #define __FUNCT__ "MatNorm_MPIAIJ"
1796 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1797 {
1798   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1799   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1800   PetscErrorCode ierr;
1801   PetscInt       i,j,cstart = mat->cmap->rstart;
1802   PetscReal      sum = 0.0;
1803   MatScalar      *v;
1804 
1805   PetscFunctionBegin;
1806   if (aij->size == 1) {
1807     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1808   } else {
1809     if (type == NORM_FROBENIUS) {
1810       v = amat->a;
1811       for (i=0; i<amat->nz; i++) {
1812 #if defined(PETSC_USE_COMPLEX)
1813         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1814 #else
1815         sum += (*v)*(*v); v++;
1816 #endif
1817       }
1818       v = bmat->a;
1819       for (i=0; i<bmat->nz; i++) {
1820 #if defined(PETSC_USE_COMPLEX)
1821         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1822 #else
1823         sum += (*v)*(*v); v++;
1824 #endif
1825       }
1826       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1827       *norm = sqrt(*norm);
1828     } else if (type == NORM_1) { /* max column norm */
1829       PetscReal *tmp,*tmp2;
1830       PetscInt  *jj,*garray = aij->garray;
1831       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1832       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1833       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1834       *norm = 0.0;
1835       v = amat->a; jj = amat->j;
1836       for (j=0; j<amat->nz; j++) {
1837         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1838       }
1839       v = bmat->a; jj = bmat->j;
1840       for (j=0; j<bmat->nz; j++) {
1841         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1842       }
1843       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1844       for (j=0; j<mat->cmap->N; j++) {
1845         if (tmp2[j] > *norm) *norm = tmp2[j];
1846       }
1847       ierr = PetscFree(tmp);CHKERRQ(ierr);
1848       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1849     } else if (type == NORM_INFINITY) { /* max row norm */
1850       PetscReal ntemp = 0.0;
1851       for (j=0; j<aij->A->rmap->n; j++) {
1852         v = amat->a + amat->i[j];
1853         sum = 0.0;
1854         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1855           sum += PetscAbsScalar(*v); v++;
1856         }
1857         v = bmat->a + bmat->i[j];
1858         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1859           sum += PetscAbsScalar(*v); v++;
1860         }
1861         if (sum > ntemp) ntemp = sum;
1862       }
1863       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1864     } else {
1865       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1866     }
1867   }
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 #undef __FUNCT__
1872 #define __FUNCT__ "MatTranspose_MPIAIJ"
1873 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1874 {
1875   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1876   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1877   PetscErrorCode ierr;
1878   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1879   PetscInt       cstart=A->cmap->rstart,ncol;
1880   Mat            B;
1881   MatScalar      *array;
1882 
1883   PetscFunctionBegin;
1884   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1885 
1886   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1887   ai = Aloc->i; aj = Aloc->j;
1888   bi = Bloc->i; bj = Bloc->j;
1889   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1890     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1891     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1892     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1893     for (i=0; i<ai[ma]; i++){
1894       d_nnz[aj[i]] ++;
1895       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1896     }
1897 
1898     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1899     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1900     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1901     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1902     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1903   } else {
1904     B = *matout;
1905   }
1906 
1907   /* copy over the A part */
1908   array = Aloc->a;
1909   row = A->rmap->rstart;
1910   for (i=0; i<ma; i++) {
1911     ncol = ai[i+1]-ai[i];
1912     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1913     row++; array += ncol; aj += ncol;
1914   }
1915   aj = Aloc->j;
1916   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1917 
1918   /* copy over the B part */
1919   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1920   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1921   array = Bloc->a;
1922   row = A->rmap->rstart;
1923   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1924   cols_tmp = cols;
1925   for (i=0; i<mb; i++) {
1926     ncol = bi[i+1]-bi[i];
1927     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1928     row++; array += ncol; cols_tmp += ncol;
1929   }
1930   ierr = PetscFree(cols);CHKERRQ(ierr);
1931 
1932   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1933   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1934   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1935     *matout = B;
1936   } else {
1937     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1938   }
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1944 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1945 {
1946   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1947   Mat            a = aij->A,b = aij->B;
1948   PetscErrorCode ierr;
1949   PetscInt       s1,s2,s3;
1950 
1951   PetscFunctionBegin;
1952   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1953   if (rr) {
1954     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1955     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1956     /* Overlap communication with computation. */
1957     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1958   }
1959   if (ll) {
1960     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1961     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1962     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1963   }
1964   /* scale  the diagonal block */
1965   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1966 
1967   if (rr) {
1968     /* Do a scatter end and then right scale the off-diagonal block */
1969     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1970     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1971   }
1972 
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 #undef __FUNCT__
1977 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1978 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1979 {
1980   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1981   PetscErrorCode ierr;
1982 
1983   PetscFunctionBegin;
1984   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1985   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1986   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1987   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1988   PetscFunctionReturn(0);
1989 }
1990 #undef __FUNCT__
1991 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1992 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1993 {
1994   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1995   PetscErrorCode ierr;
1996 
1997   PetscFunctionBegin;
1998   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 #undef __FUNCT__
2003 #define __FUNCT__ "MatEqual_MPIAIJ"
2004 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2005 {
2006   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2007   Mat            a,b,c,d;
2008   PetscBool      flg;
2009   PetscErrorCode ierr;
2010 
2011   PetscFunctionBegin;
2012   a = matA->A; b = matA->B;
2013   c = matB->A; d = matB->B;
2014 
2015   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2016   if (flg) {
2017     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2018   }
2019   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 #undef __FUNCT__
2024 #define __FUNCT__ "MatCopy_MPIAIJ"
2025 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2026 {
2027   PetscErrorCode ierr;
2028   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2029   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2030 
2031   PetscFunctionBegin;
2032   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2033   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2034     /* because of the column compression in the off-processor part of the matrix a->B,
2035        the number of columns in a->B and b->B may be different, hence we cannot call
2036        the MatCopy() directly on the two parts. If need be, we can provide a more
2037        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2038        then copying the submatrices */
2039     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2040   } else {
2041     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2042     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2043   }
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 #undef __FUNCT__
2048 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2049 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2050 {
2051   PetscErrorCode ierr;
2052 
2053   PetscFunctionBegin;
2054   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 #undef __FUNCT__
2059 #define __FUNCT__ "MatAXPY_MPIAIJ"
2060 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2061 {
2062   PetscErrorCode ierr;
2063   PetscInt       i;
2064   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2065   PetscBLASInt   bnz,one=1;
2066   Mat_SeqAIJ     *x,*y;
2067 
2068   PetscFunctionBegin;
2069   if (str == SAME_NONZERO_PATTERN) {
2070     PetscScalar alpha = a;
2071     x = (Mat_SeqAIJ *)xx->A->data;
2072     y = (Mat_SeqAIJ *)yy->A->data;
2073     bnz = PetscBLASIntCast(x->nz);
2074     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2075     x = (Mat_SeqAIJ *)xx->B->data;
2076     y = (Mat_SeqAIJ *)yy->B->data;
2077     bnz = PetscBLASIntCast(x->nz);
2078     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2079   } else if (str == SUBSET_NONZERO_PATTERN) {
2080     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2081 
2082     x = (Mat_SeqAIJ *)xx->B->data;
2083     y = (Mat_SeqAIJ *)yy->B->data;
2084     if (y->xtoy && y->XtoY != xx->B) {
2085       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2086       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2087     }
2088     if (!y->xtoy) { /* get xtoy */
2089       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2090       y->XtoY = xx->B;
2091       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2092     }
2093     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2094   } else {
2095     Mat B;
2096     PetscInt *nnz_d,*nnz_o;
2097     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2098     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2099     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2100     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2101     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2102     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2103     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2104     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2105     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2106     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2107     ierr = MatHeaderReplace(Y,B);
2108     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2109     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2110   }
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
2115 
2116 #undef __FUNCT__
2117 #define __FUNCT__ "MatConjugate_MPIAIJ"
2118 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
2119 {
2120 #if defined(PETSC_USE_COMPLEX)
2121   PetscErrorCode ierr;
2122   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2123 
2124   PetscFunctionBegin;
2125   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2126   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2127 #else
2128   PetscFunctionBegin;
2129 #endif
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 #undef __FUNCT__
2134 #define __FUNCT__ "MatRealPart_MPIAIJ"
2135 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2136 {
2137   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2138   PetscErrorCode ierr;
2139 
2140   PetscFunctionBegin;
2141   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2142   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 #undef __FUNCT__
2147 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2148 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2149 {
2150   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2151   PetscErrorCode ierr;
2152 
2153   PetscFunctionBegin;
2154   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2155   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 #ifdef PETSC_HAVE_PBGL
2160 
2161 #include <boost/parallel/mpi/bsp_process_group.hpp>
2162 #include <boost/graph/distributed/ilu_default_graph.hpp>
2163 #include <boost/graph/distributed/ilu_0_block.hpp>
2164 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2165 #include <boost/graph/distributed/petsc/interface.hpp>
2166 #include <boost/multi_array.hpp>
2167 #include <boost/parallel/distributed_property_map->hpp>
2168 
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2171 /*
2172   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2173 */
2174 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2175 {
2176   namespace petsc = boost::distributed::petsc;
2177 
2178   namespace graph_dist = boost::graph::distributed;
2179   using boost::graph::distributed::ilu_default::process_group_type;
2180   using boost::graph::ilu_permuted;
2181 
2182   PetscBool       row_identity, col_identity;
2183   PetscContainer  c;
2184   PetscInt        m, n, M, N;
2185   PetscErrorCode  ierr;
2186 
2187   PetscFunctionBegin;
2188   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2189   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2190   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2191   if (!row_identity || !col_identity) {
2192     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2193   }
2194 
2195   process_group_type pg;
2196   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2197   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2198   lgraph_type&   level_graph = *lgraph_p;
2199   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2200 
2201   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2202   ilu_permuted(level_graph);
2203 
2204   /* put together the new matrix */
2205   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2206   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2207   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2208   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2209   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2210   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2211   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2212 
2213   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2214   ierr = PetscContainerSetPointer(c, lgraph_p);
2215   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2216   PetscFunctionReturn(0);
2217 }
2218 
2219 #undef __FUNCT__
2220 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2221 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2222 {
2223   PetscFunctionBegin;
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 #undef __FUNCT__
2228 #define __FUNCT__ "MatSolve_MPIAIJ"
2229 /*
2230   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2231 */
2232 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2233 {
2234   namespace graph_dist = boost::graph::distributed;
2235 
2236   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2237   lgraph_type*   lgraph_p;
2238   PetscContainer c;
2239   PetscErrorCode ierr;
2240 
2241   PetscFunctionBegin;
2242   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2243   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2244   ierr = VecCopy(b, x);CHKERRQ(ierr);
2245 
2246   PetscScalar* array_x;
2247   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2248   PetscInt sx;
2249   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2250 
2251   PetscScalar* array_b;
2252   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2253   PetscInt sb;
2254   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2255 
2256   lgraph_type&   level_graph = *lgraph_p;
2257   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2258 
2259   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2260   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2261                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2262 
2263   typedef boost::iterator_property_map<array_ref_type::iterator,
2264                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2265   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2266                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2267 
2268   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2269 
2270   PetscFunctionReturn(0);
2271 }
2272 #endif
2273 
2274 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2275   PetscInt       nzlocal,nsends,nrecvs;
2276   PetscMPIInt    *send_rank,*recv_rank;
2277   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2278   PetscScalar    *sbuf_a,**rbuf_a;
2279   PetscErrorCode (*MatDestroy)(Mat);
2280 } Mat_Redundant;
2281 
2282 #undef __FUNCT__
2283 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2284 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2285 {
2286   PetscErrorCode       ierr;
2287   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2288   PetscInt             i;
2289 
2290   PetscFunctionBegin;
2291   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2292   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2293   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2294   for (i=0; i<redund->nrecvs; i++){
2295     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2296     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2297   }
2298   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2299   ierr = PetscFree(redund);CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatDestroy_MatRedundant"
2305 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2306 {
2307   PetscErrorCode  ierr;
2308   PetscContainer  container;
2309   Mat_Redundant   *redund=PETSC_NULL;
2310 
2311   PetscFunctionBegin;
2312   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2313   if (container) {
2314     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2315   } else {
2316     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2317   }
2318   A->ops->destroy = redund->MatDestroy;
2319   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2320   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2321   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2327 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2328 {
2329   PetscMPIInt    rank,size;
2330   MPI_Comm       comm=((PetscObject)mat)->comm;
2331   PetscErrorCode ierr;
2332   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2333   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2334   PetscInt       *rowrange=mat->rmap->range;
2335   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2336   Mat            A=aij->A,B=aij->B,C=*matredundant;
2337   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2338   PetscScalar    *sbuf_a;
2339   PetscInt       nzlocal=a->nz+b->nz;
2340   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2341   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2342   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2343   MatScalar      *aworkA,*aworkB;
2344   PetscScalar    *vals;
2345   PetscMPIInt    tag1,tag2,tag3,imdex;
2346   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2347                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2348   MPI_Status     recv_status,*send_status;
2349   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2350   PetscInt       **rbuf_j=PETSC_NULL;
2351   PetscScalar    **rbuf_a=PETSC_NULL;
2352   Mat_Redundant  *redund=PETSC_NULL;
2353   PetscContainer container;
2354 
2355   PetscFunctionBegin;
2356   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2357   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2358 
2359   if (reuse == MAT_REUSE_MATRIX) {
2360     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2361     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2362     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2363     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2364     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2365     if (container) {
2366       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2367     } else {
2368       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2369     }
2370     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2371 
2372     nsends    = redund->nsends;
2373     nrecvs    = redund->nrecvs;
2374     send_rank = redund->send_rank;
2375     recv_rank = redund->recv_rank;
2376     sbuf_nz   = redund->sbuf_nz;
2377     rbuf_nz   = redund->rbuf_nz;
2378     sbuf_j    = redund->sbuf_j;
2379     sbuf_a    = redund->sbuf_a;
2380     rbuf_j    = redund->rbuf_j;
2381     rbuf_a    = redund->rbuf_a;
2382   }
2383 
2384   if (reuse == MAT_INITIAL_MATRIX){
2385     PetscMPIInt  subrank,subsize;
2386     PetscInt     nleftover,np_subcomm;
2387     /* get the destination processors' id send_rank, nsends and nrecvs */
2388     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2389     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2390     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2391     np_subcomm = size/nsubcomm;
2392     nleftover  = size - nsubcomm*np_subcomm;
2393     nsends = 0; nrecvs = 0;
2394     for (i=0; i<size; i++){ /* i=rank*/
2395       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2396         send_rank[nsends] = i; nsends++;
2397         recv_rank[nrecvs++] = i;
2398       }
2399     }
2400     if (rank >= size - nleftover){/* this proc is a leftover processor */
2401       i = size-nleftover-1;
2402       j = 0;
2403       while (j < nsubcomm - nleftover){
2404         send_rank[nsends++] = i;
2405         i--; j++;
2406       }
2407     }
2408 
2409     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2410       for (i=0; i<nleftover; i++){
2411         recv_rank[nrecvs++] = size-nleftover+i;
2412       }
2413     }
2414 
2415     /* allocate sbuf_j, sbuf_a */
2416     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2417     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2418     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2419   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2420 
2421   /* copy mat's local entries into the buffers */
2422   if (reuse == MAT_INITIAL_MATRIX){
2423     rownz_max = 0;
2424     rptr = sbuf_j;
2425     cols = sbuf_j + rend-rstart + 1;
2426     vals = sbuf_a;
2427     rptr[0] = 0;
2428     for (i=0; i<rend-rstart; i++){
2429       row = i + rstart;
2430       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2431       ncols  = nzA + nzB;
2432       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2433       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2434       /* load the column indices for this row into cols */
2435       lwrite = 0;
2436       for (l=0; l<nzB; l++) {
2437         if ((ctmp = bmap[cworkB[l]]) < cstart){
2438           vals[lwrite]   = aworkB[l];
2439           cols[lwrite++] = ctmp;
2440         }
2441       }
2442       for (l=0; l<nzA; l++){
2443         vals[lwrite]   = aworkA[l];
2444         cols[lwrite++] = cstart + cworkA[l];
2445       }
2446       for (l=0; l<nzB; l++) {
2447         if ((ctmp = bmap[cworkB[l]]) >= cend){
2448           vals[lwrite]   = aworkB[l];
2449           cols[lwrite++] = ctmp;
2450         }
2451       }
2452       vals += ncols;
2453       cols += ncols;
2454       rptr[i+1] = rptr[i] + ncols;
2455       if (rownz_max < ncols) rownz_max = ncols;
2456     }
2457     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2458   } else { /* only copy matrix values into sbuf_a */
2459     rptr = sbuf_j;
2460     vals = sbuf_a;
2461     rptr[0] = 0;
2462     for (i=0; i<rend-rstart; i++){
2463       row = i + rstart;
2464       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2465       ncols  = nzA + nzB;
2466       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2467       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2468       lwrite = 0;
2469       for (l=0; l<nzB; l++) {
2470         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2471       }
2472       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2473       for (l=0; l<nzB; l++) {
2474         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2475       }
2476       vals += ncols;
2477       rptr[i+1] = rptr[i] + ncols;
2478     }
2479   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2480 
2481   /* send nzlocal to others, and recv other's nzlocal */
2482   /*--------------------------------------------------*/
2483   if (reuse == MAT_INITIAL_MATRIX){
2484     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2485     s_waits2 = s_waits3 + nsends;
2486     s_waits1 = s_waits2 + nsends;
2487     r_waits1 = s_waits1 + nsends;
2488     r_waits2 = r_waits1 + nrecvs;
2489     r_waits3 = r_waits2 + nrecvs;
2490   } else {
2491     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2492     r_waits3 = s_waits3 + nsends;
2493   }
2494 
2495   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2496   if (reuse == MAT_INITIAL_MATRIX){
2497     /* get new tags to keep the communication clean */
2498     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2499     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2500     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2501 
2502     /* post receives of other's nzlocal */
2503     for (i=0; i<nrecvs; i++){
2504       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2505     }
2506     /* send nzlocal to others */
2507     for (i=0; i<nsends; i++){
2508       sbuf_nz[i] = nzlocal;
2509       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2510     }
2511     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2512     count = nrecvs;
2513     while (count) {
2514       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2515       recv_rank[imdex] = recv_status.MPI_SOURCE;
2516       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2517       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2518 
2519       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2520       rbuf_nz[imdex] += i + 2;
2521       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2522       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2523       count--;
2524     }
2525     /* wait on sends of nzlocal */
2526     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2527     /* send mat->i,j to others, and recv from other's */
2528     /*------------------------------------------------*/
2529     for (i=0; i<nsends; i++){
2530       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2531       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2532     }
2533     /* wait on receives of mat->i,j */
2534     /*------------------------------*/
2535     count = nrecvs;
2536     while (count) {
2537       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2538       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2539       count--;
2540     }
2541     /* wait on sends of mat->i,j */
2542     /*---------------------------*/
2543     if (nsends) {
2544       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2545     }
2546   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2547 
2548   /* post receives, send and receive mat->a */
2549   /*----------------------------------------*/
2550   for (imdex=0; imdex<nrecvs; imdex++) {
2551     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2552   }
2553   for (i=0; i<nsends; i++){
2554     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2555   }
2556   count = nrecvs;
2557   while (count) {
2558     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2559     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2560     count--;
2561   }
2562   if (nsends) {
2563     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2564   }
2565 
2566   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2567 
2568   /* create redundant matrix */
2569   /*-------------------------*/
2570   if (reuse == MAT_INITIAL_MATRIX){
2571     /* compute rownz_max for preallocation */
2572     for (imdex=0; imdex<nrecvs; imdex++){
2573       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2574       rptr = rbuf_j[imdex];
2575       for (i=0; i<j; i++){
2576         ncols = rptr[i+1] - rptr[i];
2577         if (rownz_max < ncols) rownz_max = ncols;
2578       }
2579     }
2580 
2581     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2582     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2583     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2584     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2585     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2586   } else {
2587     C = *matredundant;
2588   }
2589 
2590   /* insert local matrix entries */
2591   rptr = sbuf_j;
2592   cols = sbuf_j + rend-rstart + 1;
2593   vals = sbuf_a;
2594   for (i=0; i<rend-rstart; i++){
2595     row   = i + rstart;
2596     ncols = rptr[i+1] - rptr[i];
2597     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2598     vals += ncols;
2599     cols += ncols;
2600   }
2601   /* insert received matrix entries */
2602   for (imdex=0; imdex<nrecvs; imdex++){
2603     rstart = rowrange[recv_rank[imdex]];
2604     rend   = rowrange[recv_rank[imdex]+1];
2605     rptr = rbuf_j[imdex];
2606     cols = rbuf_j[imdex] + rend-rstart + 1;
2607     vals = rbuf_a[imdex];
2608     for (i=0; i<rend-rstart; i++){
2609       row   = i + rstart;
2610       ncols = rptr[i+1] - rptr[i];
2611       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2612       vals += ncols;
2613       cols += ncols;
2614     }
2615   }
2616   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2617   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2618   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2619   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2620   if (reuse == MAT_INITIAL_MATRIX){
2621     PetscContainer container;
2622     *matredundant = C;
2623     /* create a supporting struct and attach it to C for reuse */
2624     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2625     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2626     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2627     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2628     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2629 
2630     redund->nzlocal = nzlocal;
2631     redund->nsends  = nsends;
2632     redund->nrecvs  = nrecvs;
2633     redund->send_rank = send_rank;
2634     redund->recv_rank = recv_rank;
2635     redund->sbuf_nz = sbuf_nz;
2636     redund->rbuf_nz = rbuf_nz;
2637     redund->sbuf_j  = sbuf_j;
2638     redund->sbuf_a  = sbuf_a;
2639     redund->rbuf_j  = rbuf_j;
2640     redund->rbuf_a  = rbuf_a;
2641 
2642     redund->MatDestroy = C->ops->destroy;
2643     C->ops->destroy    = MatDestroy_MatRedundant;
2644   }
2645   PetscFunctionReturn(0);
2646 }
2647 
2648 #undef __FUNCT__
2649 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2650 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2651 {
2652   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2653   PetscErrorCode ierr;
2654   PetscInt       i,*idxb = 0;
2655   PetscScalar    *va,*vb;
2656   Vec            vtmp;
2657 
2658   PetscFunctionBegin;
2659   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2660   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2661   if (idx) {
2662     for (i=0; i<A->rmap->n; i++) {
2663       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2664     }
2665   }
2666 
2667   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2668   if (idx) {
2669     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2670   }
2671   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2672   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2673 
2674   for (i=0; i<A->rmap->n; i++){
2675     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2676       va[i] = vb[i];
2677       if (idx) idx[i] = a->garray[idxb[i]];
2678     }
2679   }
2680 
2681   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2682   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2683   if (idxb) {
2684     ierr = PetscFree(idxb);CHKERRQ(ierr);
2685   }
2686   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 #undef __FUNCT__
2691 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2692 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2693 {
2694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2695   PetscErrorCode ierr;
2696   PetscInt       i,*idxb = 0;
2697   PetscScalar    *va,*vb;
2698   Vec            vtmp;
2699 
2700   PetscFunctionBegin;
2701   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2702   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2703   if (idx) {
2704     for (i=0; i<A->cmap->n; i++) {
2705       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2706     }
2707   }
2708 
2709   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2710   if (idx) {
2711     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2712   }
2713   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2714   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2715 
2716   for (i=0; i<A->rmap->n; i++){
2717     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2718       va[i] = vb[i];
2719       if (idx) idx[i] = a->garray[idxb[i]];
2720     }
2721   }
2722 
2723   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2724   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2725   if (idxb) {
2726     ierr = PetscFree(idxb);CHKERRQ(ierr);
2727   }
2728   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2729   PetscFunctionReturn(0);
2730 }
2731 
2732 #undef __FUNCT__
2733 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2734 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2735 {
2736   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2737   PetscInt       n      = A->rmap->n;
2738   PetscInt       cstart = A->cmap->rstart;
2739   PetscInt      *cmap   = mat->garray;
2740   PetscInt      *diagIdx, *offdiagIdx;
2741   Vec            diagV, offdiagV;
2742   PetscScalar   *a, *diagA, *offdiagA;
2743   PetscInt       r;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2748   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2749   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2750   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2751   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2752   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2753   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2754   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2755   for(r = 0; r < n; ++r) {
2756     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2757       a[r]   = diagA[r];
2758       idx[r] = cstart + diagIdx[r];
2759     } else {
2760       a[r]   = offdiagA[r];
2761       idx[r] = cmap[offdiagIdx[r]];
2762     }
2763   }
2764   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2765   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2766   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2767   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2768   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2769   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 #undef __FUNCT__
2774 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2775 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2776 {
2777   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2778   PetscInt       n      = A->rmap->n;
2779   PetscInt       cstart = A->cmap->rstart;
2780   PetscInt      *cmap   = mat->garray;
2781   PetscInt      *diagIdx, *offdiagIdx;
2782   Vec            diagV, offdiagV;
2783   PetscScalar   *a, *diagA, *offdiagA;
2784   PetscInt       r;
2785   PetscErrorCode ierr;
2786 
2787   PetscFunctionBegin;
2788   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2789   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2790   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2791   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2792   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2793   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2794   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2795   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2796   for(r = 0; r < n; ++r) {
2797     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2798       a[r]   = diagA[r];
2799       idx[r] = cstart + diagIdx[r];
2800     } else {
2801       a[r]   = offdiagA[r];
2802       idx[r] = cmap[offdiagIdx[r]];
2803     }
2804   }
2805   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2806   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2807   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2808   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2809   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2810   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2811   PetscFunctionReturn(0);
2812 }
2813 
2814 #undef __FUNCT__
2815 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2816 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2817 {
2818   PetscErrorCode ierr;
2819   Mat            *dummy;
2820 
2821   PetscFunctionBegin;
2822   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2823   *newmat = *dummy;
2824   ierr = PetscFree(dummy);CHKERRQ(ierr);
2825   PetscFunctionReturn(0);
2826 }
2827 
2828 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2829 /* -------------------------------------------------------------------*/
2830 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2831        MatGetRow_MPIAIJ,
2832        MatRestoreRow_MPIAIJ,
2833        MatMult_MPIAIJ,
2834 /* 4*/ MatMultAdd_MPIAIJ,
2835        MatMultTranspose_MPIAIJ,
2836        MatMultTransposeAdd_MPIAIJ,
2837 #ifdef PETSC_HAVE_PBGL
2838        MatSolve_MPIAIJ,
2839 #else
2840        0,
2841 #endif
2842        0,
2843        0,
2844 /*10*/ 0,
2845        0,
2846        0,
2847        MatSOR_MPIAIJ,
2848        MatTranspose_MPIAIJ,
2849 /*15*/ MatGetInfo_MPIAIJ,
2850        MatEqual_MPIAIJ,
2851        MatGetDiagonal_MPIAIJ,
2852        MatDiagonalScale_MPIAIJ,
2853        MatNorm_MPIAIJ,
2854 /*20*/ MatAssemblyBegin_MPIAIJ,
2855        MatAssemblyEnd_MPIAIJ,
2856        MatSetOption_MPIAIJ,
2857        MatZeroEntries_MPIAIJ,
2858 /*24*/ MatZeroRows_MPIAIJ,
2859        0,
2860 #ifdef PETSC_HAVE_PBGL
2861        0,
2862 #else
2863        0,
2864 #endif
2865        0,
2866        0,
2867 /*29*/ MatSetUpPreallocation_MPIAIJ,
2868 #ifdef PETSC_HAVE_PBGL
2869        0,
2870 #else
2871        0,
2872 #endif
2873        0,
2874        0,
2875        0,
2876 /*34*/ MatDuplicate_MPIAIJ,
2877        0,
2878        0,
2879        0,
2880        0,
2881 /*39*/ MatAXPY_MPIAIJ,
2882        MatGetSubMatrices_MPIAIJ,
2883        MatIncreaseOverlap_MPIAIJ,
2884        MatGetValues_MPIAIJ,
2885        MatCopy_MPIAIJ,
2886 /*44*/ MatGetRowMax_MPIAIJ,
2887        MatScale_MPIAIJ,
2888        0,
2889        0,
2890        MatZeroRowsColumns_MPIAIJ,
2891 /*49*/ MatSetBlockSize_MPIAIJ,
2892        0,
2893        0,
2894        0,
2895        0,
2896 /*54*/ MatFDColoringCreate_MPIAIJ,
2897        0,
2898        MatSetUnfactored_MPIAIJ,
2899        MatPermute_MPIAIJ,
2900        0,
2901 /*59*/ MatGetSubMatrix_MPIAIJ,
2902        MatDestroy_MPIAIJ,
2903        MatView_MPIAIJ,
2904        0,
2905        0,
2906 /*64*/ 0,
2907        0,
2908        0,
2909        0,
2910        0,
2911 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2912        MatGetRowMinAbs_MPIAIJ,
2913        0,
2914        MatSetColoring_MPIAIJ,
2915 #if defined(PETSC_HAVE_ADIC)
2916        MatSetValuesAdic_MPIAIJ,
2917 #else
2918        0,
2919 #endif
2920        MatSetValuesAdifor_MPIAIJ,
2921 /*75*/ MatFDColoringApply_AIJ,
2922        0,
2923        0,
2924        0,
2925        0,
2926 /*80*/ 0,
2927        0,
2928        0,
2929 /*83*/ MatLoad_MPIAIJ,
2930        0,
2931        0,
2932        0,
2933        0,
2934        0,
2935 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2936        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2937        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2938        MatPtAP_Basic,
2939        MatPtAPSymbolic_MPIAIJ,
2940 /*94*/ MatPtAPNumeric_MPIAIJ,
2941        0,
2942        0,
2943        0,
2944        0,
2945 /*99*/ 0,
2946        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2947        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2948        MatConjugate_MPIAIJ,
2949        0,
2950 /*104*/MatSetValuesRow_MPIAIJ,
2951        MatRealPart_MPIAIJ,
2952        MatImaginaryPart_MPIAIJ,
2953        0,
2954        0,
2955 /*109*/0,
2956        MatGetRedundantMatrix_MPIAIJ,
2957        MatGetRowMin_MPIAIJ,
2958        0,
2959        0,
2960 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2961        0,
2962        0,
2963        0,
2964        0,
2965 /*119*/0,
2966        0,
2967        0,
2968        0,
2969        MatGetMultiProcBlock_MPIAIJ
2970 };
2971 
2972 /* ----------------------------------------------------------------------------------------*/
2973 
2974 EXTERN_C_BEGIN
2975 #undef __FUNCT__
2976 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2977 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2978 {
2979   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2980   PetscErrorCode ierr;
2981 
2982   PetscFunctionBegin;
2983   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2984   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2985   PetscFunctionReturn(0);
2986 }
2987 EXTERN_C_END
2988 
2989 EXTERN_C_BEGIN
2990 #undef __FUNCT__
2991 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2992 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2993 {
2994   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2995   PetscErrorCode ierr;
2996 
2997   PetscFunctionBegin;
2998   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2999   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3000   PetscFunctionReturn(0);
3001 }
3002 EXTERN_C_END
3003 
3004 EXTERN_C_BEGIN
3005 #undef __FUNCT__
3006 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3007 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3008 {
3009   Mat_MPIAIJ     *b;
3010   PetscErrorCode ierr;
3011   PetscInt       i;
3012 
3013   PetscFunctionBegin;
3014   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3015   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3016   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3017   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3018 
3019   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3020   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3021   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3022   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3023   if (d_nnz) {
3024     for (i=0; i<B->rmap->n; i++) {
3025       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3026     }
3027   }
3028   if (o_nnz) {
3029     for (i=0; i<B->rmap->n; i++) {
3030       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3031     }
3032   }
3033   b = (Mat_MPIAIJ*)B->data;
3034 
3035   if (!B->preallocated) {
3036     /* Explicitly create 2 MATSEQAIJ matrices. */
3037     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3038     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3039     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3040     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3041     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3042     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3043     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3044     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3045   }
3046 
3047   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3048   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3049   B->preallocated = PETSC_TRUE;
3050   PetscFunctionReturn(0);
3051 }
3052 EXTERN_C_END
3053 
3054 #undef __FUNCT__
3055 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3056 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3057 {
3058   Mat            mat;
3059   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3060   PetscErrorCode ierr;
3061 
3062   PetscFunctionBegin;
3063   *newmat       = 0;
3064   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3065   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3066   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3067   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3068   a    = (Mat_MPIAIJ*)mat->data;
3069 
3070   mat->factortype    = matin->factortype;
3071   mat->rmap->bs      = matin->rmap->bs;
3072   mat->assembled    = PETSC_TRUE;
3073   mat->insertmode   = NOT_SET_VALUES;
3074   mat->preallocated = PETSC_TRUE;
3075 
3076   a->size           = oldmat->size;
3077   a->rank           = oldmat->rank;
3078   a->donotstash     = oldmat->donotstash;
3079   a->roworiented    = oldmat->roworiented;
3080   a->rowindices     = 0;
3081   a->rowvalues      = 0;
3082   a->getrowactive   = PETSC_FALSE;
3083 
3084   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3085   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3086 
3087   if (oldmat->colmap) {
3088 #if defined (PETSC_USE_CTABLE)
3089     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3090 #else
3091     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3092     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3093     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3094 #endif
3095   } else a->colmap = 0;
3096   if (oldmat->garray) {
3097     PetscInt len;
3098     len  = oldmat->B->cmap->n;
3099     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3100     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3101     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3102   } else a->garray = 0;
3103 
3104   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3105   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3106   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3107   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3108   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3109   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3110   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3111   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3112   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3113   *newmat = mat;
3114   PetscFunctionReturn(0);
3115 }
3116 
3117 /*
3118     Allows sending/receiving larger messages then 2 gigabytes in a single call
3119 */
3120 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
3121 {
3122   int             ierr;
3123   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3124   PetscInt        i,numchunks;
3125   PetscMPIInt     icnt;
3126 
3127   numchunks = cnt/CHUNKSIZE + 1;
3128   for (i=0; i<numchunks; i++) {
3129     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3130     ierr = MPI_Send(mess,icnt,type,to,tag,comm);
3131     if (type == MPIU_INT) {
3132       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3133     } else if (type == MPIU_SCALAR) {
3134       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3135     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3136   }
3137   return 0;
3138 }
3139 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
3140 {
3141   int             ierr;
3142   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3143   MPI_Status      status;
3144   PetscInt        i,numchunks;
3145   PetscMPIInt     icnt;
3146 
3147   numchunks = cnt/CHUNKSIZE + 1;
3148   for (i=0; i<numchunks; i++) {
3149     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3150     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);
3151     if (type == MPIU_INT) {
3152       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3153     } else if (type == MPIU_SCALAR) {
3154       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3155     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3156   }
3157   return 0;
3158 }
3159 
3160 #undef __FUNCT__
3161 #define __FUNCT__ "MatLoad_MPIAIJ"
3162 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3163 {
3164   PetscScalar    *vals,*svals;
3165   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3166   PetscErrorCode ierr;
3167   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3168   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3169   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3170   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3171   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3172   int            fd;
3173 
3174   PetscFunctionBegin;
3175   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3176   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3177   if (!rank) {
3178     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3179     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3180     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3181   }
3182 
3183   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3184 
3185   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3186   M = header[1]; N = header[2];
3187   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3188   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3189   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3190 
3191   /* If global sizes are set, check if they are consistent with that given in the file */
3192   if (sizesset) {
3193     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3194   }
3195   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3196   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3197 
3198   /* determine ownership of all rows */
3199   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3200   else m = newMat->rmap->n; /* Set by user */
3201 
3202   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3203   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3204 
3205   /* First process needs enough room for process with most rows */
3206   if (!rank) {
3207     mmax       = rowners[1];
3208     for (i=2; i<size; i++) {
3209       mmax = PetscMax(mmax,rowners[i]);
3210     }
3211   } else mmax = m;
3212 
3213   rowners[0] = 0;
3214   for (i=2; i<=size; i++) {
3215     rowners[i] += rowners[i-1];
3216   }
3217   rstart = rowners[rank];
3218   rend   = rowners[rank+1];
3219 
3220   /* distribute row lengths to all processors */
3221   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3222   if (!rank) {
3223     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3224     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3225     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3226     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3227     for (j=0; j<m; j++) {
3228       procsnz[0] += ourlens[j];
3229     }
3230     for (i=1; i<size; i++) {
3231       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3232       /* calculate the number of nonzeros on each processor */
3233       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3234         procsnz[i] += rowlengths[j];
3235       }
3236       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3237     }
3238     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3239   } else {
3240     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3241   }
3242 
3243   if (!rank) {
3244     /* determine max buffer needed and allocate it */
3245     maxnz = 0;
3246     for (i=0; i<size; i++) {
3247       maxnz = PetscMax(maxnz,procsnz[i]);
3248     }
3249     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3250 
3251     /* read in my part of the matrix column indices  */
3252     nz   = procsnz[0];
3253     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3254     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3255 
3256     /* read in every one elses and ship off */
3257     for (i=1; i<size; i++) {
3258       nz     = procsnz[i];
3259       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3260       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3261     }
3262     ierr = PetscFree(cols);CHKERRQ(ierr);
3263   } else {
3264     /* determine buffer space needed for message */
3265     nz = 0;
3266     for (i=0; i<m; i++) {
3267       nz += ourlens[i];
3268     }
3269     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3270 
3271     /* receive message of column indices*/
3272     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3273   }
3274 
3275   /* determine column ownership if matrix is not square */
3276   if (N != M) {
3277     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3278     else n = newMat->cmap->n;
3279     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3280     cstart = cend - n;
3281   } else {
3282     cstart = rstart;
3283     cend   = rend;
3284     n      = cend - cstart;
3285   }
3286 
3287   /* loop over local rows, determining number of off diagonal entries */
3288   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3289   jj = 0;
3290   for (i=0; i<m; i++) {
3291     for (j=0; j<ourlens[i]; j++) {
3292       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3293       jj++;
3294     }
3295   }
3296 
3297   for (i=0; i<m; i++) {
3298     ourlens[i] -= offlens[i];
3299   }
3300   if (!sizesset) {
3301     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3302   }
3303   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3304 
3305   for (i=0; i<m; i++) {
3306     ourlens[i] += offlens[i];
3307   }
3308 
3309   if (!rank) {
3310     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3311 
3312     /* read in my part of the matrix numerical values  */
3313     nz   = procsnz[0];
3314     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3315 
3316     /* insert into matrix */
3317     jj      = rstart;
3318     smycols = mycols;
3319     svals   = vals;
3320     for (i=0; i<m; i++) {
3321       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3322       smycols += ourlens[i];
3323       svals   += ourlens[i];
3324       jj++;
3325     }
3326 
3327     /* read in other processors and ship out */
3328     for (i=1; i<size; i++) {
3329       nz     = procsnz[i];
3330       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3331       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3332     }
3333     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3334   } else {
3335     /* receive numeric values */
3336     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3337 
3338     /* receive message of values*/
3339     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3340 
3341     /* insert into matrix */
3342     jj      = rstart;
3343     smycols = mycols;
3344     svals   = vals;
3345     for (i=0; i<m; i++) {
3346       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3347       smycols += ourlens[i];
3348       svals   += ourlens[i];
3349       jj++;
3350     }
3351   }
3352   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3353   ierr = PetscFree(vals);CHKERRQ(ierr);
3354   ierr = PetscFree(mycols);CHKERRQ(ierr);
3355   ierr = PetscFree(rowners);CHKERRQ(ierr);
3356 
3357   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3358   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3359   PetscFunctionReturn(0);
3360 }
3361 
3362 #undef __FUNCT__
3363 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3364 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3365 {
3366   PetscErrorCode ierr;
3367   IS             iscol_local;
3368   PetscInt       csize;
3369 
3370   PetscFunctionBegin;
3371   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3372   if (call == MAT_REUSE_MATRIX) {
3373     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3374     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3375   } else {
3376     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3377   }
3378   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3379   if (call == MAT_INITIAL_MATRIX) {
3380     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3381     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3382   }
3383   PetscFunctionReturn(0);
3384 }
3385 
3386 #undef __FUNCT__
3387 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3388 /*
3389     Not great since it makes two copies of the submatrix, first an SeqAIJ
3390   in local and then by concatenating the local matrices the end result.
3391   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3392 
3393   Note: This requires a sequential iscol with all indices.
3394 */
3395 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3396 {
3397   PetscErrorCode ierr;
3398   PetscMPIInt    rank,size;
3399   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3400   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3401   Mat            *local,M,Mreuse;
3402   MatScalar      *vwork,*aa;
3403   MPI_Comm       comm = ((PetscObject)mat)->comm;
3404   Mat_SeqAIJ     *aij;
3405 
3406 
3407   PetscFunctionBegin;
3408   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3409   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3410 
3411   if (call ==  MAT_REUSE_MATRIX) {
3412     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3413     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3414     local = &Mreuse;
3415     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3416   } else {
3417     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3418     Mreuse = *local;
3419     ierr   = PetscFree(local);CHKERRQ(ierr);
3420   }
3421 
3422   /*
3423       m - number of local rows
3424       n - number of columns (same on all processors)
3425       rstart - first row in new global matrix generated
3426   */
3427   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3428   if (call == MAT_INITIAL_MATRIX) {
3429     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3430     ii  = aij->i;
3431     jj  = aij->j;
3432 
3433     /*
3434         Determine the number of non-zeros in the diagonal and off-diagonal
3435         portions of the matrix in order to do correct preallocation
3436     */
3437 
3438     /* first get start and end of "diagonal" columns */
3439     if (csize == PETSC_DECIDE) {
3440       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3441       if (mglobal == n) { /* square matrix */
3442 	nlocal = m;
3443       } else {
3444         nlocal = n/size + ((n % size) > rank);
3445       }
3446     } else {
3447       nlocal = csize;
3448     }
3449     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3450     rstart = rend - nlocal;
3451     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3452 
3453     /* next, compute all the lengths */
3454     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3455     olens = dlens + m;
3456     for (i=0; i<m; i++) {
3457       jend = ii[i+1] - ii[i];
3458       olen = 0;
3459       dlen = 0;
3460       for (j=0; j<jend; j++) {
3461         if (*jj < rstart || *jj >= rend) olen++;
3462         else dlen++;
3463         jj++;
3464       }
3465       olens[i] = olen;
3466       dlens[i] = dlen;
3467     }
3468     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3469     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3470     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3471     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3472     ierr = PetscFree(dlens);CHKERRQ(ierr);
3473   } else {
3474     PetscInt ml,nl;
3475 
3476     M = *newmat;
3477     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3478     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3479     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3480     /*
3481          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3482        rather than the slower MatSetValues().
3483     */
3484     M->was_assembled = PETSC_TRUE;
3485     M->assembled     = PETSC_FALSE;
3486   }
3487   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3488   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3489   ii  = aij->i;
3490   jj  = aij->j;
3491   aa  = aij->a;
3492   for (i=0; i<m; i++) {
3493     row   = rstart + i;
3494     nz    = ii[i+1] - ii[i];
3495     cwork = jj;     jj += nz;
3496     vwork = aa;     aa += nz;
3497     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3498   }
3499 
3500   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3501   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3502   *newmat = M;
3503 
3504   /* save submatrix used in processor for next request */
3505   if (call ==  MAT_INITIAL_MATRIX) {
3506     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3507     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3508   }
3509 
3510   PetscFunctionReturn(0);
3511 }
3512 
3513 EXTERN_C_BEGIN
3514 #undef __FUNCT__
3515 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3516 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3517 {
3518   PetscInt       m,cstart, cend,j,nnz,i,d;
3519   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3520   const PetscInt *JJ;
3521   PetscScalar    *values;
3522   PetscErrorCode ierr;
3523 
3524   PetscFunctionBegin;
3525   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3526 
3527   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3528   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3529   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3530   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3531   m      = B->rmap->n;
3532   cstart = B->cmap->rstart;
3533   cend   = B->cmap->rend;
3534   rstart = B->rmap->rstart;
3535 
3536   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3537 
3538 #if defined(PETSC_USE_DEBUGGING)
3539   for (i=0; i<m; i++) {
3540     nnz     = Ii[i+1]- Ii[i];
3541     JJ      = J + Ii[i];
3542     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3543     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3544     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3545   }
3546 #endif
3547 
3548   for (i=0; i<m; i++) {
3549     nnz     = Ii[i+1]- Ii[i];
3550     JJ      = J + Ii[i];
3551     nnz_max = PetscMax(nnz_max,nnz);
3552     d       = 0;
3553     for (j=0; j<nnz; j++) {
3554       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3555     }
3556     d_nnz[i] = d;
3557     o_nnz[i] = nnz - d;
3558   }
3559   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3560   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3561 
3562   if (v) values = (PetscScalar*)v;
3563   else {
3564     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3565     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3566   }
3567 
3568   for (i=0; i<m; i++) {
3569     ii   = i + rstart;
3570     nnz  = Ii[i+1]- Ii[i];
3571     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3572   }
3573   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3574   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3575 
3576   if (!v) {
3577     ierr = PetscFree(values);CHKERRQ(ierr);
3578   }
3579   PetscFunctionReturn(0);
3580 }
3581 EXTERN_C_END
3582 
3583 #undef __FUNCT__
3584 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3585 /*@
3586    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3587    (the default parallel PETSc format).
3588 
3589    Collective on MPI_Comm
3590 
3591    Input Parameters:
3592 +  B - the matrix
3593 .  i - the indices into j for the start of each local row (starts with zero)
3594 .  j - the column indices for each local row (starts with zero)
3595 -  v - optional values in the matrix
3596 
3597    Level: developer
3598 
3599    Notes:
3600        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3601      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3602      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3603 
3604        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3605 
3606        The format which is used for the sparse matrix input, is equivalent to a
3607     row-major ordering.. i.e for the following matrix, the input data expected is
3608     as shown:
3609 
3610         1 0 0
3611         2 0 3     P0
3612        -------
3613         4 5 6     P1
3614 
3615      Process0 [P0]: rows_owned=[0,1]
3616         i =  {0,1,3}  [size = nrow+1  = 2+1]
3617         j =  {0,0,2}  [size = nz = 6]
3618         v =  {1,2,3}  [size = nz = 6]
3619 
3620      Process1 [P1]: rows_owned=[2]
3621         i =  {0,3}    [size = nrow+1  = 1+1]
3622         j =  {0,1,2}  [size = nz = 6]
3623         v =  {4,5,6}  [size = nz = 6]
3624 
3625 .keywords: matrix, aij, compressed row, sparse, parallel
3626 
3627 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3628           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3629 @*/
3630 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3631 {
3632   PetscErrorCode ierr;
3633 
3634   PetscFunctionBegin;
3635   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3636   PetscFunctionReturn(0);
3637 }
3638 
3639 #undef __FUNCT__
3640 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3641 /*@C
3642    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3643    (the default parallel PETSc format).  For good matrix assembly performance
3644    the user should preallocate the matrix storage by setting the parameters
3645    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3646    performance can be increased by more than a factor of 50.
3647 
3648    Collective on MPI_Comm
3649 
3650    Input Parameters:
3651 +  A - the matrix
3652 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3653            (same value is used for all local rows)
3654 .  d_nnz - array containing the number of nonzeros in the various rows of the
3655            DIAGONAL portion of the local submatrix (possibly different for each row)
3656            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3657            The size of this array is equal to the number of local rows, i.e 'm'.
3658            You must leave room for the diagonal entry even if it is zero.
3659 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3660            submatrix (same value is used for all local rows).
3661 -  o_nnz - array containing the number of nonzeros in the various rows of the
3662            OFF-DIAGONAL portion of the local submatrix (possibly different for
3663            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3664            structure. The size of this array is equal to the number
3665            of local rows, i.e 'm'.
3666 
3667    If the *_nnz parameter is given then the *_nz parameter is ignored
3668 
3669    The AIJ format (also called the Yale sparse matrix format or
3670    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3671    storage.  The stored row and column indices begin with zero.
3672    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3673 
3674    The parallel matrix is partitioned such that the first m0 rows belong to
3675    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3676    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3677 
3678    The DIAGONAL portion of the local submatrix of a processor can be defined
3679    as the submatrix which is obtained by extraction the part corresponding to
3680    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3681    first row that belongs to the processor, r2 is the last row belonging to
3682    the this processor, and c1-c2 is range of indices of the local part of a
3683    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3684    common case of a square matrix, the row and column ranges are the same and
3685    the DIAGONAL part is also square. The remaining portion of the local
3686    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3687 
3688    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3689 
3690    You can call MatGetInfo() to get information on how effective the preallocation was;
3691    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3692    You can also run with the option -info and look for messages with the string
3693    malloc in them to see if additional memory allocation was needed.
3694 
3695    Example usage:
3696 
3697    Consider the following 8x8 matrix with 34 non-zero values, that is
3698    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3699    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3700    as follows:
3701 
3702 .vb
3703             1  2  0  |  0  3  0  |  0  4
3704     Proc0   0  5  6  |  7  0  0  |  8  0
3705             9  0 10  | 11  0  0  | 12  0
3706     -------------------------------------
3707            13  0 14  | 15 16 17  |  0  0
3708     Proc1   0 18  0  | 19 20 21  |  0  0
3709             0  0  0  | 22 23  0  | 24  0
3710     -------------------------------------
3711     Proc2  25 26 27  |  0  0 28  | 29  0
3712            30  0  0  | 31 32 33  |  0 34
3713 .ve
3714 
3715    This can be represented as a collection of submatrices as:
3716 
3717 .vb
3718       A B C
3719       D E F
3720       G H I
3721 .ve
3722 
3723    Where the submatrices A,B,C are owned by proc0, D,E,F are
3724    owned by proc1, G,H,I are owned by proc2.
3725 
3726    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3727    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3728    The 'M','N' parameters are 8,8, and have the same values on all procs.
3729 
3730    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3731    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3732    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3733    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3734    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3735    matrix, ans [DF] as another SeqAIJ matrix.
3736 
3737    When d_nz, o_nz parameters are specified, d_nz storage elements are
3738    allocated for every row of the local diagonal submatrix, and o_nz
3739    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3740    One way to choose d_nz and o_nz is to use the max nonzerors per local
3741    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3742    In this case, the values of d_nz,o_nz are:
3743 .vb
3744      proc0 : dnz = 2, o_nz = 2
3745      proc1 : dnz = 3, o_nz = 2
3746      proc2 : dnz = 1, o_nz = 4
3747 .ve
3748    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3749    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3750    for proc3. i.e we are using 12+15+10=37 storage locations to store
3751    34 values.
3752 
3753    When d_nnz, o_nnz parameters are specified, the storage is specified
3754    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3755    In the above case the values for d_nnz,o_nnz are:
3756 .vb
3757      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3758      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3759      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3760 .ve
3761    Here the space allocated is sum of all the above values i.e 34, and
3762    hence pre-allocation is perfect.
3763 
3764    Level: intermediate
3765 
3766 .keywords: matrix, aij, compressed row, sparse, parallel
3767 
3768 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3769           MPIAIJ, MatGetInfo()
3770 @*/
3771 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3772 {
3773   PetscErrorCode ierr;
3774 
3775   PetscFunctionBegin;
3776   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3777   PetscFunctionReturn(0);
3778 }
3779 
3780 #undef __FUNCT__
3781 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3782 /*@
3783      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3784          CSR format the local rows.
3785 
3786    Collective on MPI_Comm
3787 
3788    Input Parameters:
3789 +  comm - MPI communicator
3790 .  m - number of local rows (Cannot be PETSC_DECIDE)
3791 .  n - This value should be the same as the local size used in creating the
3792        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3793        calculated if N is given) For square matrices n is almost always m.
3794 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3795 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3796 .   i - row indices
3797 .   j - column indices
3798 -   a - matrix values
3799 
3800    Output Parameter:
3801 .   mat - the matrix
3802 
3803    Level: intermediate
3804 
3805    Notes:
3806        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3807      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3808      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3809 
3810        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3811 
3812        The format which is used for the sparse matrix input, is equivalent to a
3813     row-major ordering.. i.e for the following matrix, the input data expected is
3814     as shown:
3815 
3816         1 0 0
3817         2 0 3     P0
3818        -------
3819         4 5 6     P1
3820 
3821      Process0 [P0]: rows_owned=[0,1]
3822         i =  {0,1,3}  [size = nrow+1  = 2+1]
3823         j =  {0,0,2}  [size = nz = 6]
3824         v =  {1,2,3}  [size = nz = 6]
3825 
3826      Process1 [P1]: rows_owned=[2]
3827         i =  {0,3}    [size = nrow+1  = 1+1]
3828         j =  {0,1,2}  [size = nz = 6]
3829         v =  {4,5,6}  [size = nz = 6]
3830 
3831 .keywords: matrix, aij, compressed row, sparse, parallel
3832 
3833 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3834           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3835 @*/
3836 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3837 {
3838   PetscErrorCode ierr;
3839 
3840  PetscFunctionBegin;
3841   if (i[0]) {
3842     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3843   }
3844   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3845   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3846   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3847   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3848   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3849   PetscFunctionReturn(0);
3850 }
3851 
3852 #undef __FUNCT__
3853 #define __FUNCT__ "MatCreateMPIAIJ"
3854 /*@C
3855    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3856    (the default parallel PETSc format).  For good matrix assembly performance
3857    the user should preallocate the matrix storage by setting the parameters
3858    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3859    performance can be increased by more than a factor of 50.
3860 
3861    Collective on MPI_Comm
3862 
3863    Input Parameters:
3864 +  comm - MPI communicator
3865 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3866            This value should be the same as the local size used in creating the
3867            y vector for the matrix-vector product y = Ax.
3868 .  n - This value should be the same as the local size used in creating the
3869        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3870        calculated if N is given) For square matrices n is almost always m.
3871 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3872 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3873 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3874            (same value is used for all local rows)
3875 .  d_nnz - array containing the number of nonzeros in the various rows of the
3876            DIAGONAL portion of the local submatrix (possibly different for each row)
3877            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3878            The size of this array is equal to the number of local rows, i.e 'm'.
3879            You must leave room for the diagonal entry even if it is zero.
3880 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3881            submatrix (same value is used for all local rows).
3882 -  o_nnz - array containing the number of nonzeros in the various rows of the
3883            OFF-DIAGONAL portion of the local submatrix (possibly different for
3884            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3885            structure. The size of this array is equal to the number
3886            of local rows, i.e 'm'.
3887 
3888    Output Parameter:
3889 .  A - the matrix
3890 
3891    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3892    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3893    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3894 
3895    Notes:
3896    If the *_nnz parameter is given then the *_nz parameter is ignored
3897 
3898    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3899    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3900    storage requirements for this matrix.
3901 
3902    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3903    processor than it must be used on all processors that share the object for
3904    that argument.
3905 
3906    The user MUST specify either the local or global matrix dimensions
3907    (possibly both).
3908 
3909    The parallel matrix is partitioned across processors such that the
3910    first m0 rows belong to process 0, the next m1 rows belong to
3911    process 1, the next m2 rows belong to process 2 etc.. where
3912    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3913    values corresponding to [m x N] submatrix.
3914 
3915    The columns are logically partitioned with the n0 columns belonging
3916    to 0th partition, the next n1 columns belonging to the next
3917    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3918 
3919    The DIAGONAL portion of the local submatrix on any given processor
3920    is the submatrix corresponding to the rows and columns m,n
3921    corresponding to the given processor. i.e diagonal matrix on
3922    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3923    etc. The remaining portion of the local submatrix [m x (N-n)]
3924    constitute the OFF-DIAGONAL portion. The example below better
3925    illustrates this concept.
3926 
3927    For a square global matrix we define each processor's diagonal portion
3928    to be its local rows and the corresponding columns (a square submatrix);
3929    each processor's off-diagonal portion encompasses the remainder of the
3930    local matrix (a rectangular submatrix).
3931 
3932    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3933 
3934    When calling this routine with a single process communicator, a matrix of
3935    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3936    type of communicator, use the construction mechanism:
3937      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3938 
3939    By default, this format uses inodes (identical nodes) when possible.
3940    We search for consecutive rows with the same nonzero structure, thereby
3941    reusing matrix information to achieve increased efficiency.
3942 
3943    Options Database Keys:
3944 +  -mat_no_inode  - Do not use inodes
3945 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3946 -  -mat_aij_oneindex - Internally use indexing starting at 1
3947         rather than 0.  Note that when calling MatSetValues(),
3948         the user still MUST index entries starting at 0!
3949 
3950 
3951    Example usage:
3952 
3953    Consider the following 8x8 matrix with 34 non-zero values, that is
3954    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3955    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3956    as follows:
3957 
3958 .vb
3959             1  2  0  |  0  3  0  |  0  4
3960     Proc0   0  5  6  |  7  0  0  |  8  0
3961             9  0 10  | 11  0  0  | 12  0
3962     -------------------------------------
3963            13  0 14  | 15 16 17  |  0  0
3964     Proc1   0 18  0  | 19 20 21  |  0  0
3965             0  0  0  | 22 23  0  | 24  0
3966     -------------------------------------
3967     Proc2  25 26 27  |  0  0 28  | 29  0
3968            30  0  0  | 31 32 33  |  0 34
3969 .ve
3970 
3971    This can be represented as a collection of submatrices as:
3972 
3973 .vb
3974       A B C
3975       D E F
3976       G H I
3977 .ve
3978 
3979    Where the submatrices A,B,C are owned by proc0, D,E,F are
3980    owned by proc1, G,H,I are owned by proc2.
3981 
3982    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3983    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3984    The 'M','N' parameters are 8,8, and have the same values on all procs.
3985 
3986    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3987    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3988    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3989    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3990    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3991    matrix, ans [DF] as another SeqAIJ matrix.
3992 
3993    When d_nz, o_nz parameters are specified, d_nz storage elements are
3994    allocated for every row of the local diagonal submatrix, and o_nz
3995    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3996    One way to choose d_nz and o_nz is to use the max nonzerors per local
3997    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3998    In this case, the values of d_nz,o_nz are:
3999 .vb
4000      proc0 : dnz = 2, o_nz = 2
4001      proc1 : dnz = 3, o_nz = 2
4002      proc2 : dnz = 1, o_nz = 4
4003 .ve
4004    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4005    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4006    for proc3. i.e we are using 12+15+10=37 storage locations to store
4007    34 values.
4008 
4009    When d_nnz, o_nnz parameters are specified, the storage is specified
4010    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4011    In the above case the values for d_nnz,o_nnz are:
4012 .vb
4013      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4014      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4015      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4016 .ve
4017    Here the space allocated is sum of all the above values i.e 34, and
4018    hence pre-allocation is perfect.
4019 
4020    Level: intermediate
4021 
4022 .keywords: matrix, aij, compressed row, sparse, parallel
4023 
4024 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4025           MPIAIJ, MatCreateMPIAIJWithArrays()
4026 @*/
4027 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4028 {
4029   PetscErrorCode ierr;
4030   PetscMPIInt    size;
4031 
4032   PetscFunctionBegin;
4033   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4034   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4035   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4036   if (size > 1) {
4037     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4038     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4039   } else {
4040     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4041     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4042   }
4043   PetscFunctionReturn(0);
4044 }
4045 
4046 #undef __FUNCT__
4047 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4048 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4049 {
4050   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4051 
4052   PetscFunctionBegin;
4053   *Ad     = a->A;
4054   *Ao     = a->B;
4055   *colmap = a->garray;
4056   PetscFunctionReturn(0);
4057 }
4058 
4059 #undef __FUNCT__
4060 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4061 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4062 {
4063   PetscErrorCode ierr;
4064   PetscInt       i;
4065   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4066 
4067   PetscFunctionBegin;
4068   if (coloring->ctype == IS_COLORING_GLOBAL) {
4069     ISColoringValue *allcolors,*colors;
4070     ISColoring      ocoloring;
4071 
4072     /* set coloring for diagonal portion */
4073     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4074 
4075     /* set coloring for off-diagonal portion */
4076     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4077     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4078     for (i=0; i<a->B->cmap->n; i++) {
4079       colors[i] = allcolors[a->garray[i]];
4080     }
4081     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4082     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4083     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4084     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4085   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4086     ISColoringValue *colors;
4087     PetscInt        *larray;
4088     ISColoring      ocoloring;
4089 
4090     /* set coloring for diagonal portion */
4091     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4092     for (i=0; i<a->A->cmap->n; i++) {
4093       larray[i] = i + A->cmap->rstart;
4094     }
4095     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4096     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4097     for (i=0; i<a->A->cmap->n; i++) {
4098       colors[i] = coloring->colors[larray[i]];
4099     }
4100     ierr = PetscFree(larray);CHKERRQ(ierr);
4101     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4102     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4103     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4104 
4105     /* set coloring for off-diagonal portion */
4106     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4107     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4108     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4109     for (i=0; i<a->B->cmap->n; i++) {
4110       colors[i] = coloring->colors[larray[i]];
4111     }
4112     ierr = PetscFree(larray);CHKERRQ(ierr);
4113     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4114     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4115     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
4116   } else {
4117     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4118   }
4119 
4120   PetscFunctionReturn(0);
4121 }
4122 
4123 #if defined(PETSC_HAVE_ADIC)
4124 #undef __FUNCT__
4125 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4126 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4127 {
4128   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4129   PetscErrorCode ierr;
4130 
4131   PetscFunctionBegin;
4132   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4133   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4134   PetscFunctionReturn(0);
4135 }
4136 #endif
4137 
4138 #undef __FUNCT__
4139 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4140 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4141 {
4142   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4143   PetscErrorCode ierr;
4144 
4145   PetscFunctionBegin;
4146   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4147   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4148   PetscFunctionReturn(0);
4149 }
4150 
4151 #undef __FUNCT__
4152 #define __FUNCT__ "MatMerge"
4153 /*@
4154       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4155                  matrices from each processor
4156 
4157     Collective on MPI_Comm
4158 
4159    Input Parameters:
4160 +    comm - the communicators the parallel matrix will live on
4161 .    inmat - the input sequential matrices
4162 .    n - number of local columns (or PETSC_DECIDE)
4163 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4164 
4165    Output Parameter:
4166 .    outmat - the parallel matrix generated
4167 
4168     Level: advanced
4169 
4170    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4171 
4172 @*/
4173 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4174 {
4175   PetscErrorCode ierr;
4176   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4177   PetscInt       *indx;
4178   PetscScalar    *values;
4179 
4180   PetscFunctionBegin;
4181   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4182   if (scall == MAT_INITIAL_MATRIX){
4183     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4184     if (n == PETSC_DECIDE){
4185       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4186     }
4187     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4188     rstart -= m;
4189 
4190     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4191     for (i=0;i<m;i++) {
4192       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4193       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4194       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4195     }
4196     /* This routine will ONLY return MPIAIJ type matrix */
4197     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4198     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4199     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4200     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4201     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4202 
4203   } else if (scall == MAT_REUSE_MATRIX){
4204     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4205   } else {
4206     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4207   }
4208 
4209   for (i=0;i<m;i++) {
4210     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4211     Ii    = i + rstart;
4212     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4213     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4214   }
4215   ierr = MatDestroy(inmat);CHKERRQ(ierr);
4216   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4217   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4218 
4219   PetscFunctionReturn(0);
4220 }
4221 
4222 #undef __FUNCT__
4223 #define __FUNCT__ "MatFileSplit"
4224 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4225 {
4226   PetscErrorCode    ierr;
4227   PetscMPIInt       rank;
4228   PetscInt          m,N,i,rstart,nnz;
4229   size_t            len;
4230   const PetscInt    *indx;
4231   PetscViewer       out;
4232   char              *name;
4233   Mat               B;
4234   const PetscScalar *values;
4235 
4236   PetscFunctionBegin;
4237   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4238   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4239   /* Should this be the type of the diagonal block of A? */
4240   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4241   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4242   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4243   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4244   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4245   for (i=0;i<m;i++) {
4246     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4247     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4248     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4249   }
4250   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4251   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4252 
4253   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4254   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4255   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4256   sprintf(name,"%s.%d",outfile,rank);
4257   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4258   ierr = PetscFree(name);
4259   ierr = MatView(B,out);CHKERRQ(ierr);
4260   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4261   ierr = MatDestroy(B);CHKERRQ(ierr);
4262   PetscFunctionReturn(0);
4263 }
4264 
4265 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
4266 #undef __FUNCT__
4267 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4268 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4269 {
4270   PetscErrorCode       ierr;
4271   Mat_Merge_SeqsToMPI  *merge;
4272   PetscContainer       container;
4273 
4274   PetscFunctionBegin;
4275   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4276   if (container) {
4277     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4278     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4279     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4280     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4281     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4282     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4283     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4284     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4285     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4286     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4287     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4288     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4289     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4290     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4291 
4292     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4293     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4294   }
4295   ierr = PetscFree(merge);CHKERRQ(ierr);
4296 
4297   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4298   PetscFunctionReturn(0);
4299 }
4300 
4301 #include "../src/mat/utils/freespace.h"
4302 #include "petscbt.h"
4303 
4304 #undef __FUNCT__
4305 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4306 /*@C
4307       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4308                  matrices from each processor
4309 
4310     Collective on MPI_Comm
4311 
4312    Input Parameters:
4313 +    comm - the communicators the parallel matrix will live on
4314 .    seqmat - the input sequential matrices
4315 .    m - number of local rows (or PETSC_DECIDE)
4316 .    n - number of local columns (or PETSC_DECIDE)
4317 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4318 
4319    Output Parameter:
4320 .    mpimat - the parallel matrix generated
4321 
4322     Level: advanced
4323 
4324    Notes:
4325      The dimensions of the sequential matrix in each processor MUST be the same.
4326      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4327      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4328 @*/
4329 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4330 {
4331   PetscErrorCode       ierr;
4332   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4333   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4334   PetscMPIInt          size,rank,taga,*len_s;
4335   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4336   PetscInt             proc,m;
4337   PetscInt             **buf_ri,**buf_rj;
4338   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4339   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4340   MPI_Request          *s_waits,*r_waits;
4341   MPI_Status           *status;
4342   MatScalar            *aa=a->a;
4343   MatScalar            **abuf_r,*ba_i;
4344   Mat_Merge_SeqsToMPI  *merge;
4345   PetscContainer       container;
4346 
4347   PetscFunctionBegin;
4348   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4349 
4350   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4351   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4352 
4353   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4354   if (container) {
4355     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4356   }
4357   bi     = merge->bi;
4358   bj     = merge->bj;
4359   buf_ri = merge->buf_ri;
4360   buf_rj = merge->buf_rj;
4361 
4362   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4363   owners = merge->rowmap->range;
4364   len_s  = merge->len_s;
4365 
4366   /* send and recv matrix values */
4367   /*-----------------------------*/
4368   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4369   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4370 
4371   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4372   for (proc=0,k=0; proc<size; proc++){
4373     if (!len_s[proc]) continue;
4374     i = owners[proc];
4375     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4376     k++;
4377   }
4378 
4379   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4380   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4381   ierr = PetscFree(status);CHKERRQ(ierr);
4382 
4383   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4384   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4385 
4386   /* insert mat values of mpimat */
4387   /*----------------------------*/
4388   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4389   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4390 
4391   for (k=0; k<merge->nrecv; k++){
4392     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4393     nrows = *(buf_ri_k[k]);
4394     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4395     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4396   }
4397 
4398   /* set values of ba */
4399   m = merge->rowmap->n;
4400   for (i=0; i<m; i++) {
4401     arow = owners[rank] + i;
4402     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4403     bnzi = bi[i+1] - bi[i];
4404     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4405 
4406     /* add local non-zero vals of this proc's seqmat into ba */
4407     anzi = ai[arow+1] - ai[arow];
4408     aj   = a->j + ai[arow];
4409     aa   = a->a + ai[arow];
4410     nextaj = 0;
4411     for (j=0; nextaj<anzi; j++){
4412       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4413         ba_i[j] += aa[nextaj++];
4414       }
4415     }
4416 
4417     /* add received vals into ba */
4418     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4419       /* i-th row */
4420       if (i == *nextrow[k]) {
4421         anzi = *(nextai[k]+1) - *nextai[k];
4422         aj   = buf_rj[k] + *(nextai[k]);
4423         aa   = abuf_r[k] + *(nextai[k]);
4424         nextaj = 0;
4425         for (j=0; nextaj<anzi; j++){
4426           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4427             ba_i[j] += aa[nextaj++];
4428           }
4429         }
4430         nextrow[k]++; nextai[k]++;
4431       }
4432     }
4433     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4434   }
4435   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4436   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4437 
4438   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4439   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4440   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4441   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4442   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4443   PetscFunctionReturn(0);
4444 }
4445 
4446 #undef __FUNCT__
4447 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4448 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4449 {
4450   PetscErrorCode       ierr;
4451   Mat                  B_mpi;
4452   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4453   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4454   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4455   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4456   PetscInt             len,proc,*dnz,*onz;
4457   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4458   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4459   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4460   MPI_Status           *status;
4461   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4462   PetscBT              lnkbt;
4463   Mat_Merge_SeqsToMPI  *merge;
4464   PetscContainer       container;
4465 
4466   PetscFunctionBegin;
4467   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4468 
4469   /* make sure it is a PETSc comm */
4470   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4471   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4472   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4473 
4474   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4475   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4476 
4477   /* determine row ownership */
4478   /*---------------------------------------------------------*/
4479   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4480   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4481   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4482   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4483   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4484   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4485   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4486 
4487   m      = merge->rowmap->n;
4488   M      = merge->rowmap->N;
4489   owners = merge->rowmap->range;
4490 
4491   /* determine the number of messages to send, their lengths */
4492   /*---------------------------------------------------------*/
4493   len_s  = merge->len_s;
4494 
4495   len = 0;  /* length of buf_si[] */
4496   merge->nsend = 0;
4497   for (proc=0; proc<size; proc++){
4498     len_si[proc] = 0;
4499     if (proc == rank){
4500       len_s[proc] = 0;
4501     } else {
4502       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4503       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4504     }
4505     if (len_s[proc]) {
4506       merge->nsend++;
4507       nrows = 0;
4508       for (i=owners[proc]; i<owners[proc+1]; i++){
4509         if (ai[i+1] > ai[i]) nrows++;
4510       }
4511       len_si[proc] = 2*(nrows+1);
4512       len += len_si[proc];
4513     }
4514   }
4515 
4516   /* determine the number and length of messages to receive for ij-structure */
4517   /*-------------------------------------------------------------------------*/
4518   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4519   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4520 
4521   /* post the Irecv of j-structure */
4522   /*-------------------------------*/
4523   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4524   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4525 
4526   /* post the Isend of j-structure */
4527   /*--------------------------------*/
4528   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4529 
4530   for (proc=0, k=0; proc<size; proc++){
4531     if (!len_s[proc]) continue;
4532     i = owners[proc];
4533     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4534     k++;
4535   }
4536 
4537   /* receives and sends of j-structure are complete */
4538   /*------------------------------------------------*/
4539   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4540   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4541 
4542   /* send and recv i-structure */
4543   /*---------------------------*/
4544   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4545   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4546 
4547   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4548   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4549   for (proc=0,k=0; proc<size; proc++){
4550     if (!len_s[proc]) continue;
4551     /* form outgoing message for i-structure:
4552          buf_si[0]:                 nrows to be sent
4553                [1:nrows]:           row index (global)
4554                [nrows+1:2*nrows+1]: i-structure index
4555     */
4556     /*-------------------------------------------*/
4557     nrows = len_si[proc]/2 - 1;
4558     buf_si_i    = buf_si + nrows+1;
4559     buf_si[0]   = nrows;
4560     buf_si_i[0] = 0;
4561     nrows = 0;
4562     for (i=owners[proc]; i<owners[proc+1]; i++){
4563       anzi = ai[i+1] - ai[i];
4564       if (anzi) {
4565         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4566         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4567         nrows++;
4568       }
4569     }
4570     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4571     k++;
4572     buf_si += len_si[proc];
4573   }
4574 
4575   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4576   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4577 
4578   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4579   for (i=0; i<merge->nrecv; i++){
4580     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4581   }
4582 
4583   ierr = PetscFree(len_si);CHKERRQ(ierr);
4584   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4585   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4586   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4587   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4588   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4589   ierr = PetscFree(status);CHKERRQ(ierr);
4590 
4591   /* compute a local seq matrix in each processor */
4592   /*----------------------------------------------*/
4593   /* allocate bi array and free space for accumulating nonzero column info */
4594   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4595   bi[0] = 0;
4596 
4597   /* create and initialize a linked list */
4598   nlnk = N+1;
4599   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4600 
4601   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4602   len = 0;
4603   len  = ai[owners[rank+1]] - ai[owners[rank]];
4604   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4605   current_space = free_space;
4606 
4607   /* determine symbolic info for each local row */
4608   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4609 
4610   for (k=0; k<merge->nrecv; k++){
4611     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4612     nrows = *buf_ri_k[k];
4613     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4614     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4615   }
4616 
4617   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4618   len = 0;
4619   for (i=0;i<m;i++) {
4620     bnzi   = 0;
4621     /* add local non-zero cols of this proc's seqmat into lnk */
4622     arow   = owners[rank] + i;
4623     anzi   = ai[arow+1] - ai[arow];
4624     aj     = a->j + ai[arow];
4625     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4626     bnzi += nlnk;
4627     /* add received col data into lnk */
4628     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4629       if (i == *nextrow[k]) { /* i-th row */
4630         anzi = *(nextai[k]+1) - *nextai[k];
4631         aj   = buf_rj[k] + *nextai[k];
4632         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4633         bnzi += nlnk;
4634         nextrow[k]++; nextai[k]++;
4635       }
4636     }
4637     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4638 
4639     /* if free space is not available, make more free space */
4640     if (current_space->local_remaining<bnzi) {
4641       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4642       nspacedouble++;
4643     }
4644     /* copy data into free space, then initialize lnk */
4645     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4646     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4647 
4648     current_space->array           += bnzi;
4649     current_space->local_used      += bnzi;
4650     current_space->local_remaining -= bnzi;
4651 
4652     bi[i+1] = bi[i] + bnzi;
4653   }
4654 
4655   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4656 
4657   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4658   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4659   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4660 
4661   /* create symbolic parallel matrix B_mpi */
4662   /*---------------------------------------*/
4663   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4664   if (n==PETSC_DECIDE) {
4665     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4666   } else {
4667     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4668   }
4669   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4670   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4671   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4672 
4673   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4674   B_mpi->assembled     = PETSC_FALSE;
4675   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4676   merge->bi            = bi;
4677   merge->bj            = bj;
4678   merge->buf_ri        = buf_ri;
4679   merge->buf_rj        = buf_rj;
4680   merge->coi           = PETSC_NULL;
4681   merge->coj           = PETSC_NULL;
4682   merge->owners_co     = PETSC_NULL;
4683 
4684   /* attach the supporting struct to B_mpi for reuse */
4685   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4686   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4687   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4688   *mpimat = B_mpi;
4689 
4690   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4691   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4692   PetscFunctionReturn(0);
4693 }
4694 
4695 #undef __FUNCT__
4696 #define __FUNCT__ "MatMerge_SeqsToMPI"
4697 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4698 {
4699   PetscErrorCode   ierr;
4700 
4701   PetscFunctionBegin;
4702   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4703   if (scall == MAT_INITIAL_MATRIX){
4704     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4705   }
4706   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4707   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4708   PetscFunctionReturn(0);
4709 }
4710 
4711 #undef __FUNCT__
4712 #define __FUNCT__ "MatGetLocalMat"
4713 /*@
4714      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4715 
4716     Not Collective
4717 
4718    Input Parameters:
4719 +    A - the matrix
4720 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4721 
4722    Output Parameter:
4723 .    A_loc - the local sequential matrix generated
4724 
4725     Level: developer
4726 
4727 @*/
4728 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4729 {
4730   PetscErrorCode  ierr;
4731   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4732   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4733   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4734   MatScalar       *aa=a->a,*ba=b->a,*cam;
4735   PetscScalar     *ca;
4736   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4737   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4738 
4739   PetscFunctionBegin;
4740   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4741   if (scall == MAT_INITIAL_MATRIX){
4742     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4743     ci[0] = 0;
4744     for (i=0; i<am; i++){
4745       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4746     }
4747     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4748     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4749     k = 0;
4750     for (i=0; i<am; i++) {
4751       ncols_o = bi[i+1] - bi[i];
4752       ncols_d = ai[i+1] - ai[i];
4753       /* off-diagonal portion of A */
4754       for (jo=0; jo<ncols_o; jo++) {
4755         col = cmap[*bj];
4756         if (col >= cstart) break;
4757         cj[k]   = col; bj++;
4758         ca[k++] = *ba++;
4759       }
4760       /* diagonal portion of A */
4761       for (j=0; j<ncols_d; j++) {
4762         cj[k]   = cstart + *aj++;
4763         ca[k++] = *aa++;
4764       }
4765       /* off-diagonal portion of A */
4766       for (j=jo; j<ncols_o; j++) {
4767         cj[k]   = cmap[*bj++];
4768         ca[k++] = *ba++;
4769       }
4770     }
4771     /* put together the new matrix */
4772     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4773     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4774     /* Since these are PETSc arrays, change flags to free them as necessary. */
4775     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4776     mat->free_a  = PETSC_TRUE;
4777     mat->free_ij = PETSC_TRUE;
4778     mat->nonew   = 0;
4779   } else if (scall == MAT_REUSE_MATRIX){
4780     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4781     ci = mat->i; cj = mat->j; cam = mat->a;
4782     for (i=0; i<am; i++) {
4783       /* off-diagonal portion of A */
4784       ncols_o = bi[i+1] - bi[i];
4785       for (jo=0; jo<ncols_o; jo++) {
4786         col = cmap[*bj];
4787         if (col >= cstart) break;
4788         *cam++ = *ba++; bj++;
4789       }
4790       /* diagonal portion of A */
4791       ncols_d = ai[i+1] - ai[i];
4792       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4793       /* off-diagonal portion of A */
4794       for (j=jo; j<ncols_o; j++) {
4795         *cam++ = *ba++; bj++;
4796       }
4797     }
4798   } else {
4799     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4800   }
4801 
4802   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4803   PetscFunctionReturn(0);
4804 }
4805 
4806 #undef __FUNCT__
4807 #define __FUNCT__ "MatGetLocalMatCondensed"
4808 /*@C
4809      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4810 
4811     Not Collective
4812 
4813    Input Parameters:
4814 +    A - the matrix
4815 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4816 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4817 
4818    Output Parameter:
4819 .    A_loc - the local sequential matrix generated
4820 
4821     Level: developer
4822 
4823 @*/
4824 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4825 {
4826   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4827   PetscErrorCode    ierr;
4828   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4829   IS                isrowa,iscola;
4830   Mat               *aloc;
4831 
4832   PetscFunctionBegin;
4833   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4834   if (!row){
4835     start = A->rmap->rstart; end = A->rmap->rend;
4836     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4837   } else {
4838     isrowa = *row;
4839   }
4840   if (!col){
4841     start = A->cmap->rstart;
4842     cmap  = a->garray;
4843     nzA   = a->A->cmap->n;
4844     nzB   = a->B->cmap->n;
4845     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4846     ncols = 0;
4847     for (i=0; i<nzB; i++) {
4848       if (cmap[i] < start) idx[ncols++] = cmap[i];
4849       else break;
4850     }
4851     imark = i;
4852     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4853     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4854     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4855   } else {
4856     iscola = *col;
4857   }
4858   if (scall != MAT_INITIAL_MATRIX){
4859     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4860     aloc[0] = *A_loc;
4861   }
4862   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4863   *A_loc = aloc[0];
4864   ierr = PetscFree(aloc);CHKERRQ(ierr);
4865   if (!row){
4866     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4867   }
4868   if (!col){
4869     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4870   }
4871   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4872   PetscFunctionReturn(0);
4873 }
4874 
4875 #undef __FUNCT__
4876 #define __FUNCT__ "MatGetBrowsOfAcols"
4877 /*@C
4878     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4879 
4880     Collective on Mat
4881 
4882    Input Parameters:
4883 +    A,B - the matrices in mpiaij format
4884 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4885 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4886 
4887    Output Parameter:
4888 +    rowb, colb - index sets of rows and columns of B to extract
4889 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4890 -    B_seq - the sequential matrix generated
4891 
4892     Level: developer
4893 
4894 @*/
4895 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4896 {
4897   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4898   PetscErrorCode    ierr;
4899   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4900   IS                isrowb,iscolb;
4901   Mat               *bseq;
4902 
4903   PetscFunctionBegin;
4904   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4905     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4906   }
4907   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4908 
4909   if (scall == MAT_INITIAL_MATRIX){
4910     start = A->cmap->rstart;
4911     cmap  = a->garray;
4912     nzA   = a->A->cmap->n;
4913     nzB   = a->B->cmap->n;
4914     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4915     ncols = 0;
4916     for (i=0; i<nzB; i++) {  /* row < local row index */
4917       if (cmap[i] < start) idx[ncols++] = cmap[i];
4918       else break;
4919     }
4920     imark = i;
4921     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4922     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4923     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4924     *brstart = imark;
4925     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4926   } else {
4927     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4928     isrowb = *rowb; iscolb = *colb;
4929     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4930     bseq[0] = *B_seq;
4931   }
4932   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4933   *B_seq = bseq[0];
4934   ierr = PetscFree(bseq);CHKERRQ(ierr);
4935   if (!rowb){
4936     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4937   } else {
4938     *rowb = isrowb;
4939   }
4940   if (!colb){
4941     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4942   } else {
4943     *colb = iscolb;
4944   }
4945   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4946   PetscFunctionReturn(0);
4947 }
4948 
4949 #undef __FUNCT__
4950 #define __FUNCT__ "MatGetBrowsOfAoCols"
4951 /*@C
4952     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4953     of the OFF-DIAGONAL portion of local A
4954 
4955     Collective on Mat
4956 
4957    Input Parameters:
4958 +    A,B - the matrices in mpiaij format
4959 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4960 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4961 .    startsj_r - similar to startsj for receives
4962 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4963 
4964    Output Parameter:
4965 +    B_oth - the sequential matrix generated
4966 
4967     Level: developer
4968 
4969 @*/
4970 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4971 {
4972   VecScatter_MPI_General *gen_to,*gen_from;
4973   PetscErrorCode         ierr;
4974   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4975   Mat_SeqAIJ             *b_oth;
4976   VecScatter             ctx=a->Mvctx;
4977   MPI_Comm               comm=((PetscObject)ctx)->comm;
4978   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4979   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4980   PetscScalar            *rvalues,*svalues;
4981   MatScalar              *b_otha,*bufa,*bufA;
4982   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4983   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4984   MPI_Status             *sstatus,rstatus;
4985   PetscMPIInt            jj;
4986   PetscInt               *cols,sbs,rbs;
4987   PetscScalar            *vals;
4988 
4989   PetscFunctionBegin;
4990   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4991     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4992   }
4993   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4994   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4995 
4996   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4997   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4998   rvalues  = gen_from->values; /* holds the length of receiving row */
4999   svalues  = gen_to->values;   /* holds the length of sending row */
5000   nrecvs   = gen_from->n;
5001   nsends   = gen_to->n;
5002 
5003   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5004   srow     = gen_to->indices;   /* local row index to be sent */
5005   sstarts  = gen_to->starts;
5006   sprocs   = gen_to->procs;
5007   sstatus  = gen_to->sstatus;
5008   sbs      = gen_to->bs;
5009   rstarts  = gen_from->starts;
5010   rprocs   = gen_from->procs;
5011   rbs      = gen_from->bs;
5012 
5013   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5014   if (scall == MAT_INITIAL_MATRIX){
5015     /* i-array */
5016     /*---------*/
5017     /*  post receives */
5018     for (i=0; i<nrecvs; i++){
5019       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5020       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5021       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5022     }
5023 
5024     /* pack the outgoing message */
5025     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5026     sstartsj[0] = 0;  rstartsj[0] = 0;
5027     len = 0; /* total length of j or a array to be sent */
5028     k = 0;
5029     for (i=0; i<nsends; i++){
5030       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5031       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5032       for (j=0; j<nrows; j++) {
5033         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5034         for (l=0; l<sbs; l++){
5035           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5036           rowlen[j*sbs+l] = ncols;
5037           len += ncols;
5038           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5039         }
5040         k++;
5041       }
5042       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5043       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5044     }
5045     /* recvs and sends of i-array are completed */
5046     i = nrecvs;
5047     while (i--) {
5048       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5049     }
5050     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5051 
5052     /* allocate buffers for sending j and a arrays */
5053     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5054     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5055 
5056     /* create i-array of B_oth */
5057     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5058     b_othi[0] = 0;
5059     len = 0; /* total length of j or a array to be received */
5060     k = 0;
5061     for (i=0; i<nrecvs; i++){
5062       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5063       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5064       for (j=0; j<nrows; j++) {
5065         b_othi[k+1] = b_othi[k] + rowlen[j];
5066         len += rowlen[j]; k++;
5067       }
5068       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5069     }
5070 
5071     /* allocate space for j and a arrrays of B_oth */
5072     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5073     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5074 
5075     /* j-array */
5076     /*---------*/
5077     /*  post receives of j-array */
5078     for (i=0; i<nrecvs; i++){
5079       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5080       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5081     }
5082 
5083     /* pack the outgoing message j-array */
5084     k = 0;
5085     for (i=0; i<nsends; i++){
5086       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5087       bufJ = bufj+sstartsj[i];
5088       for (j=0; j<nrows; j++) {
5089         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5090         for (ll=0; ll<sbs; ll++){
5091           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5092           for (l=0; l<ncols; l++){
5093             *bufJ++ = cols[l];
5094           }
5095           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5096         }
5097       }
5098       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5099     }
5100 
5101     /* recvs and sends of j-array are completed */
5102     i = nrecvs;
5103     while (i--) {
5104       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5105     }
5106     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5107   } else if (scall == MAT_REUSE_MATRIX){
5108     sstartsj = *startsj;
5109     rstartsj = *startsj_r;
5110     bufa     = *bufa_ptr;
5111     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5112     b_otha   = b_oth->a;
5113   } else {
5114     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5115   }
5116 
5117   /* a-array */
5118   /*---------*/
5119   /*  post receives of a-array */
5120   for (i=0; i<nrecvs; i++){
5121     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5122     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5123   }
5124 
5125   /* pack the outgoing message a-array */
5126   k = 0;
5127   for (i=0; i<nsends; i++){
5128     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5129     bufA = bufa+sstartsj[i];
5130     for (j=0; j<nrows; j++) {
5131       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5132       for (ll=0; ll<sbs; ll++){
5133         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5134         for (l=0; l<ncols; l++){
5135           *bufA++ = vals[l];
5136         }
5137         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5138       }
5139     }
5140     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5141   }
5142   /* recvs and sends of a-array are completed */
5143   i = nrecvs;
5144   while (i--) {
5145     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5146   }
5147   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5148   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5149 
5150   if (scall == MAT_INITIAL_MATRIX){
5151     /* put together the new matrix */
5152     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5153 
5154     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5155     /* Since these are PETSc arrays, change flags to free them as necessary. */
5156     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5157     b_oth->free_a  = PETSC_TRUE;
5158     b_oth->free_ij = PETSC_TRUE;
5159     b_oth->nonew   = 0;
5160 
5161     ierr = PetscFree(bufj);CHKERRQ(ierr);
5162     if (!startsj || !bufa_ptr){
5163       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5164       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5165     } else {
5166       *startsj   = sstartsj;
5167       *startsj_r = rstartsj;
5168       *bufa_ptr  = bufa;
5169     }
5170   }
5171   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5172   PetscFunctionReturn(0);
5173 }
5174 
5175 #undef __FUNCT__
5176 #define __FUNCT__ "MatGetCommunicationStructs"
5177 /*@C
5178   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5179 
5180   Not Collective
5181 
5182   Input Parameters:
5183 . A - The matrix in mpiaij format
5184 
5185   Output Parameter:
5186 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5187 . colmap - A map from global column index to local index into lvec
5188 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5189 
5190   Level: developer
5191 
5192 @*/
5193 #if defined (PETSC_USE_CTABLE)
5194 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5195 #else
5196 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5197 #endif
5198 {
5199   Mat_MPIAIJ *a;
5200 
5201   PetscFunctionBegin;
5202   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5203   PetscValidPointer(lvec, 2);
5204   PetscValidPointer(colmap, 3);
5205   PetscValidPointer(multScatter, 4);
5206   a = (Mat_MPIAIJ *) A->data;
5207   if (lvec) *lvec = a->lvec;
5208   if (colmap) *colmap = a->colmap;
5209   if (multScatter) *multScatter = a->Mvctx;
5210   PetscFunctionReturn(0);
5211 }
5212 
5213 EXTERN_C_BEGIN
5214 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5215 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5216 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5217 EXTERN_C_END
5218 
5219 #undef __FUNCT__
5220 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5221 /*
5222     Computes (B'*A')' since computing B*A directly is untenable
5223 
5224                n                       p                          p
5225         (              )       (              )         (                  )
5226       m (      A       )  *  n (       B      )   =   m (         C        )
5227         (              )       (              )         (                  )
5228 
5229 */
5230 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5231 {
5232   PetscErrorCode     ierr;
5233   Mat                At,Bt,Ct;
5234 
5235   PetscFunctionBegin;
5236   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5237   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5238   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5239   ierr = MatDestroy(At);CHKERRQ(ierr);
5240   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5241   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5242   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5243   PetscFunctionReturn(0);
5244 }
5245 
5246 #undef __FUNCT__
5247 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5248 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5249 {
5250   PetscErrorCode ierr;
5251   PetscInt       m=A->rmap->n,n=B->cmap->n;
5252   Mat            Cmat;
5253 
5254   PetscFunctionBegin;
5255   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5256   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5257   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5258   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5259   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5260   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5261   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5262   *C   = Cmat;
5263   PetscFunctionReturn(0);
5264 }
5265 
5266 /* ----------------------------------------------------------------*/
5267 #undef __FUNCT__
5268 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5269 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5270 {
5271   PetscErrorCode ierr;
5272 
5273   PetscFunctionBegin;
5274   if (scall == MAT_INITIAL_MATRIX){
5275     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5276   }
5277   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5278   PetscFunctionReturn(0);
5279 }
5280 
5281 EXTERN_C_BEGIN
5282 #if defined(PETSC_HAVE_MUMPS)
5283 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5284 #endif
5285 #if defined(PETSC_HAVE_PASTIX)
5286 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5287 #endif
5288 #if defined(PETSC_HAVE_SUPERLU_DIST)
5289 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5290 #endif
5291 #if defined(PETSC_HAVE_SPOOLES)
5292 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5293 #endif
5294 EXTERN_C_END
5295 
5296 /*MC
5297    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5298 
5299    Options Database Keys:
5300 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5301 
5302   Level: beginner
5303 
5304 .seealso: MatCreateMPIAIJ()
5305 M*/
5306 
5307 EXTERN_C_BEGIN
5308 #undef __FUNCT__
5309 #define __FUNCT__ "MatCreate_MPIAIJ"
5310 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5311 {
5312   Mat_MPIAIJ     *b;
5313   PetscErrorCode ierr;
5314   PetscMPIInt    size;
5315 
5316   PetscFunctionBegin;
5317   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5318 
5319   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5320   B->data         = (void*)b;
5321   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5322   B->rmap->bs     = 1;
5323   B->assembled    = PETSC_FALSE;
5324   B->mapping      = 0;
5325 
5326   B->insertmode   = NOT_SET_VALUES;
5327   b->size         = size;
5328   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5329 
5330   /* build cache for off array entries formed */
5331   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5332   b->donotstash  = PETSC_FALSE;
5333   b->colmap      = 0;
5334   b->garray      = 0;
5335   b->roworiented = PETSC_TRUE;
5336 
5337   /* stuff used for matrix vector multiply */
5338   b->lvec      = PETSC_NULL;
5339   b->Mvctx     = PETSC_NULL;
5340 
5341   /* stuff for MatGetRow() */
5342   b->rowindices   = 0;
5343   b->rowvalues    = 0;
5344   b->getrowactive = PETSC_FALSE;
5345 
5346 #if defined(PETSC_HAVE_SPOOLES)
5347   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5348                                      "MatGetFactor_mpiaij_spooles",
5349                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5350 #endif
5351 #if defined(PETSC_HAVE_MUMPS)
5352   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5353                                      "MatGetFactor_aij_mumps",
5354                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5355 #endif
5356 #if defined(PETSC_HAVE_PASTIX)
5357   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5358 					   "MatGetFactor_mpiaij_pastix",
5359 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5360 #endif
5361 #if defined(PETSC_HAVE_SUPERLU_DIST)
5362   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5363                                      "MatGetFactor_mpiaij_superlu_dist",
5364                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5365 #endif
5366   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5367                                      "MatStoreValues_MPIAIJ",
5368                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5369   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5370                                      "MatRetrieveValues_MPIAIJ",
5371                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5372   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5373 				     "MatGetDiagonalBlock_MPIAIJ",
5374                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5375   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5376 				     "MatIsTranspose_MPIAIJ",
5377 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5378   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5379 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5380 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5381   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5382 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5383 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5384   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5385 				     "MatDiagonalScaleLocal_MPIAIJ",
5386 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5387   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5388                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5389                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5390   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5391                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5392                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5393   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5394                                      "MatConvert_MPIAIJ_MPISBAIJ",
5395                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5396   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5397                                      "MatMatMult_MPIDense_MPIAIJ",
5398                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5399   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5400                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5401                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5402   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5403                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5404                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5405   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5406   PetscFunctionReturn(0);
5407 }
5408 EXTERN_C_END
5409 
5410 #undef __FUNCT__
5411 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5412 /*@
5413      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5414          and "off-diagonal" part of the matrix in CSR format.
5415 
5416    Collective on MPI_Comm
5417 
5418    Input Parameters:
5419 +  comm - MPI communicator
5420 .  m - number of local rows (Cannot be PETSC_DECIDE)
5421 .  n - This value should be the same as the local size used in creating the
5422        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5423        calculated if N is given) For square matrices n is almost always m.
5424 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5425 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5426 .   i - row indices for "diagonal" portion of matrix
5427 .   j - column indices
5428 .   a - matrix values
5429 .   oi - row indices for "off-diagonal" portion of matrix
5430 .   oj - column indices
5431 -   oa - matrix values
5432 
5433    Output Parameter:
5434 .   mat - the matrix
5435 
5436    Level: advanced
5437 
5438    Notes:
5439        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5440 
5441        The i and j indices are 0 based
5442 
5443        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5444 
5445        This sets local rows and cannot be used to set off-processor values.
5446 
5447        You cannot later use MatSetValues() to change values in this matrix.
5448 
5449 .keywords: matrix, aij, compressed row, sparse, parallel
5450 
5451 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5452           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5453 @*/
5454 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5455 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5456 {
5457   PetscErrorCode ierr;
5458   Mat_MPIAIJ     *maij;
5459 
5460  PetscFunctionBegin;
5461   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5462   if (i[0]) {
5463     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5464   }
5465   if (oi[0]) {
5466     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5467   }
5468   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5469   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5470   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5471   maij = (Mat_MPIAIJ*) (*mat)->data;
5472   maij->donotstash     = PETSC_TRUE;
5473   (*mat)->preallocated = PETSC_TRUE;
5474 
5475   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5476   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5477   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5478   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5479 
5480   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5481   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5482 
5483   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5484   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5485   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5486   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5487 
5488   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5489   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5490   PetscFunctionReturn(0);
5491 }
5492 
5493 /*
5494     Special version for direct calls from Fortran
5495 */
5496 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5497 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5498 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5499 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5500 #endif
5501 
5502 /* Change these macros so can be used in void function */
5503 #undef CHKERRQ
5504 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5505 #undef SETERRQ2
5506 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5507 #undef SETERRQ
5508 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5509 
5510 EXTERN_C_BEGIN
5511 #undef __FUNCT__
5512 #define __FUNCT__ "matsetvaluesmpiaij_"
5513 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5514 {
5515   Mat             mat = *mmat;
5516   PetscInt        m = *mm, n = *mn;
5517   InsertMode      addv = *maddv;
5518   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5519   PetscScalar     value;
5520   PetscErrorCode  ierr;
5521 
5522   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5523   if (mat->insertmode == NOT_SET_VALUES) {
5524     mat->insertmode = addv;
5525   }
5526 #if defined(PETSC_USE_DEBUG)
5527   else if (mat->insertmode != addv) {
5528     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5529   }
5530 #endif
5531   {
5532   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5533   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5534   PetscBool       roworiented = aij->roworiented;
5535 
5536   /* Some Variables required in the macro */
5537   Mat             A = aij->A;
5538   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5539   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5540   MatScalar       *aa = a->a;
5541   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5542   Mat             B = aij->B;
5543   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5544   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5545   MatScalar       *ba = b->a;
5546 
5547   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5548   PetscInt        nonew = a->nonew;
5549   MatScalar       *ap1,*ap2;
5550 
5551   PetscFunctionBegin;
5552   for (i=0; i<m; i++) {
5553     if (im[i] < 0) continue;
5554 #if defined(PETSC_USE_DEBUG)
5555     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5556 #endif
5557     if (im[i] >= rstart && im[i] < rend) {
5558       row      = im[i] - rstart;
5559       lastcol1 = -1;
5560       rp1      = aj + ai[row];
5561       ap1      = aa + ai[row];
5562       rmax1    = aimax[row];
5563       nrow1    = ailen[row];
5564       low1     = 0;
5565       high1    = nrow1;
5566       lastcol2 = -1;
5567       rp2      = bj + bi[row];
5568       ap2      = ba + bi[row];
5569       rmax2    = bimax[row];
5570       nrow2    = bilen[row];
5571       low2     = 0;
5572       high2    = nrow2;
5573 
5574       for (j=0; j<n; j++) {
5575         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5576         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5577         if (in[j] >= cstart && in[j] < cend){
5578           col = in[j] - cstart;
5579           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5580         } else if (in[j] < 0) continue;
5581 #if defined(PETSC_USE_DEBUG)
5582         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5583 #endif
5584         else {
5585           if (mat->was_assembled) {
5586             if (!aij->colmap) {
5587               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5588             }
5589 #if defined (PETSC_USE_CTABLE)
5590             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5591 	    col--;
5592 #else
5593             col = aij->colmap[in[j]] - 1;
5594 #endif
5595             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5596               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5597               col =  in[j];
5598               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5599               B = aij->B;
5600               b = (Mat_SeqAIJ*)B->data;
5601               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5602               rp2      = bj + bi[row];
5603               ap2      = ba + bi[row];
5604               rmax2    = bimax[row];
5605               nrow2    = bilen[row];
5606               low2     = 0;
5607               high2    = nrow2;
5608               bm       = aij->B->rmap->n;
5609               ba = b->a;
5610             }
5611           } else col = in[j];
5612           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5613         }
5614       }
5615     } else {
5616       if (!aij->donotstash) {
5617         if (roworiented) {
5618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5619         } else {
5620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5621         }
5622       }
5623     }
5624   }}
5625   PetscFunctionReturnVoid();
5626 }
5627 EXTERN_C_END
5628 
5629