xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 71fd2e92b2fee25f70818ffc1dca9fc57f8584d7) !
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatDistribute_MPIAIJ"
7 /*
8     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
9     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
10 
11     Only for square matrices
12 */
13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
14 {
15   PetscMPIInt    rank,size;
16   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
17   PetscErrorCode ierr;
18   Mat            mat;
19   Mat_SeqAIJ     *gmata;
20   PetscMPIInt    tag;
21   MPI_Status     status;
22   PetscTruth     aij;
23   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
24 
25   PetscFunctionBegin;
26   CHKMEMQ;
27   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
28   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
29   if (!rank) {
30     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
31     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
32   }
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
35     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
36     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
37     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
38     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
39     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
40     rowners[0] = 0;
41     for (i=2; i<=size; i++) {
42       rowners[i] += rowners[i-1];
43     }
44     rstart = rowners[rank];
45     rend   = rowners[rank+1];
46     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
47     if (!rank) {
48       gmata = (Mat_SeqAIJ*) gmat->data;
49       /* send row lengths to all processors */
50       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
51       for (i=1; i<size; i++) {
52 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
53       }
54       /* determine number diagonal and off-diagonal counts */
55       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
56       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
57       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
58       jj = 0;
59       for (i=0; i<m; i++) {
60 	for (j=0; j<dlens[i]; j++) {
61           if (gmata->j[jj] < rstart) ld[i]++;
62 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
63 	  jj++;
64 	}
65       }
66       /* send column indices to other processes */
67       for (i=1; i<size; i++) {
68 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
69 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
70 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71       }
72 
73       /* send numerical values to other processes */
74       for (i=1; i<size; i++) {
75         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
76         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
77       }
78       gmataa = gmata->a;
79       gmataj = gmata->j;
80 
81     } else {
82       /* receive row lengths */
83       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
84       /* receive column indices */
85       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
86       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
87       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
88       /* determine number diagonal and off-diagonal counts */
89       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
90       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
91       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
92       jj = 0;
93       for (i=0; i<m; i++) {
94 	for (j=0; j<dlens[i]; j++) {
95           if (gmataj[jj] < rstart) ld[i]++;
96 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
97 	  jj++;
98 	}
99       }
100       /* receive numerical values */
101       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
103     }
104     /* set preallocation */
105     for (i=0; i<m; i++) {
106       dlens[i] -= olens[i];
107     }
108     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
109     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
110 
111     for (i=0; i<m; i++) {
112       dlens[i] += olens[i];
113     }
114     cnt  = 0;
115     for (i=0; i<m; i++) {
116       row  = rstart + i;
117       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
118       cnt += dlens[i];
119     }
120     if (rank) {
121       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
122     }
123     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
124     ierr = PetscFree(rowners);CHKERRQ(ierr);
125     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
126     *inmat = mat;
127   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
128     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
129     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
130     mat   = *inmat;
131     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
132     if (!rank) {
133       /* send numerical values to other processes */
134       gmata = (Mat_SeqAIJ*) gmat->data;
135       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
136       gmataa = gmata->a;
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
142     } else {
143       /* receive numerical values from process 0*/
144       nz   = Ad->nz + Ao->nz;
145       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
146       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
147     }
148     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
149     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
150     ad = Ad->a;
151     ao = Ao->a;
152     if (mat->rmap->n) {
153       i  = 0;
154       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
155       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
156     }
157     for (i=1; i<mat->rmap->n; i++) {
158       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
159       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
160     }
161     i--;
162     if (mat->rmap->n) {
163       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
164     }
165     if (rank) {
166       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
167     }
168   }
169   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
170   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   CHKMEMQ;
172   PetscFunctionReturn(0);
173 }
174 
175 /*
176   Local utility routine that creates a mapping from the global column
177 number to the local number in the off-diagonal part of the local
178 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
179 a slightly higher hash table cost; without it it is not scalable (each processor
180 has an order N integer array but is fast to acess.
181 */
182 #undef __FUNCT__
183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
185 {
186   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
187   PetscErrorCode ierr;
188   PetscInt       n = aij->B->cmap->n,i;
189 
190   PetscFunctionBegin;
191 #if defined (PETSC_USE_CTABLE)
192   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
193   for (i=0; i<n; i++){
194     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
195   }
196 #else
197   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
198   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
199   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
201 #endif
202   PetscFunctionReturn(0);
203 }
204 
205 
206 #define CHUNKSIZE   15
207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
208 { \
209     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
210     lastcol1 = col;\
211     while (high1-low1 > 5) { \
212       t = (low1+high1)/2; \
213       if (rp1[t] > col) high1 = t; \
214       else             low1  = t; \
215     } \
216       for (_i=low1; _i<high1; _i++) { \
217         if (rp1[_i] > col) break; \
218         if (rp1[_i] == col) { \
219           if (addv == ADD_VALUES) ap1[_i] += value;   \
220           else                    ap1[_i] = value; \
221           goto a_noinsert; \
222         } \
223       }  \
224       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
225       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
226       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
227       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
228       N = nrow1++ - 1; a->nz++; high1++; \
229       /* shift up all the later entries in this row */ \
230       for (ii=N; ii>=_i; ii--) { \
231         rp1[ii+1] = rp1[ii]; \
232         ap1[ii+1] = ap1[ii]; \
233       } \
234       rp1[_i] = col;  \
235       ap1[_i] = value;  \
236       a_noinsert: ; \
237       ailen[row] = nrow1; \
238 }
239 
240 
241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
242 { \
243     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
244     lastcol2 = col;\
245     while (high2-low2 > 5) { \
246       t = (low2+high2)/2; \
247       if (rp2[t] > col) high2 = t; \
248       else             low2  = t; \
249     } \
250     for (_i=low2; _i<high2; _i++) {		\
251       if (rp2[_i] > col) break;			\
252       if (rp2[_i] == col) {			      \
253 	if (addv == ADD_VALUES) ap2[_i] += value;     \
254 	else                    ap2[_i] = value;      \
255 	goto b_noinsert;			      \
256       }						      \
257     }							      \
258     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
259     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
260     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
261     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
262     N = nrow2++ - 1; b->nz++; high2++;					\
263     /* shift up all the later entries in this row */			\
264     for (ii=N; ii>=_i; ii--) {						\
265       rp2[ii+1] = rp2[ii];						\
266       ap2[ii+1] = ap2[ii];						\
267     }									\
268     rp2[_i] = col;							\
269     ap2[_i] = value;							\
270     b_noinsert: ;								\
271     bilen[row] = nrow2;							\
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
277 {
278   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
279   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
280   PetscErrorCode ierr;
281   PetscInt       l,*garray = mat->garray,diag;
282 
283   PetscFunctionBegin;
284   /* code only works for square matrices A */
285 
286   /* find size of row to the left of the diagonal part */
287   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
288   row  = row - diag;
289   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
290     if (garray[b->j[b->i[row]+l]] > diag) break;
291   }
292   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
293 
294   /* diagonal part */
295   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
296 
297   /* right of diagonal part */
298   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatSetValues_MPIAIJ"
304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscScalar    value;
308   PetscErrorCode ierr;
309   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
310   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
311   PetscTruth     roworiented = aij->roworiented;
312 
313   /* Some Variables required in the macro */
314   Mat            A = aij->A;
315   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
316   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
317   MatScalar      *aa = a->a;
318   PetscTruth     ignorezeroentries = a->ignorezeroentries;
319   Mat            B = aij->B;
320   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
321   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
322   MatScalar      *ba = b->a;
323 
324   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
325   PetscInt       nonew = a->nonew;
326   MatScalar      *ap1,*ap2;
327 
328   PetscFunctionBegin;
329   if (v) PetscValidScalarPointer(v,6);
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
544   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
550 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
551 {
552   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
553   PetscErrorCode ierr;
554 
555   PetscFunctionBegin;
556   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
557   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatZeroRows_MPIAIJ"
563 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
564 {
565   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
568   PetscInt       i,*owners = A->rmap->range;
569   PetscInt       *nprocs,j,idx,nsends,row;
570   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
571   PetscInt       *rvalues,count,base,slen,*source;
572   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
573   MPI_Comm       comm = ((PetscObject)A)->comm;
574   MPI_Request    *send_waits,*recv_waits;
575   MPI_Status     recv_status,*send_status;
576 #if defined(PETSC_DEBUG)
577   PetscTruth     found = PETSC_FALSE;
578 #endif
579 
580   PetscFunctionBegin;
581   /*  first count number of contributors to each processor */
582   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
583   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
584   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
585   j = 0;
586   for (i=0; i<N; i++) {
587     if (lastidx > (idx = rows[i])) j = 0;
588     lastidx = idx;
589     for (; j<size; j++) {
590       if (idx >= owners[j] && idx < owners[j+1]) {
591         nprocs[2*j]++;
592         nprocs[2*j+1] = 1;
593         owner[i] = j;
594 #if defined(PETSC_DEBUG)
595         found = PETSC_TRUE;
596 #endif
597         break;
598       }
599     }
600 #if defined(PETSC_DEBUG)
601     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
602     found = PETSC_FALSE;
603 #endif
604   }
605   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
606 
607   /* inform other processors of number of messages and max length*/
608   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
609 
610   /* post receives:   */
611   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
612   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
613   for (i=0; i<nrecvs; i++) {
614     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
615   }
616 
617   /* do sends:
618       1) starts[i] gives the starting index in svalues for stuff going to
619          the ith processor
620   */
621   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
622   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
623   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
624   starts[0] = 0;
625   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
626   for (i=0; i<N; i++) {
627     svalues[starts[owner[i]]++] = rows[i];
628   }
629 
630   starts[0] = 0;
631   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
632   count = 0;
633   for (i=0; i<size; i++) {
634     if (nprocs[2*i+1]) {
635       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
636     }
637   }
638   ierr = PetscFree(starts);CHKERRQ(ierr);
639 
640   base = owners[rank];
641 
642   /*  wait on receives */
643   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
644   source = lens + nrecvs;
645   count  = nrecvs; slen = 0;
646   while (count) {
647     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
648     /* unpack receives into our local space */
649     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
650     source[imdex]  = recv_status.MPI_SOURCE;
651     lens[imdex]    = n;
652     slen          += n;
653     count--;
654   }
655   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
656 
657   /* move the data into the send scatter */
658   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
659   count = 0;
660   for (i=0; i<nrecvs; i++) {
661     values = rvalues + i*nmax;
662     for (j=0; j<lens[i]; j++) {
663       lrows[count++] = values[j] - base;
664     }
665   }
666   ierr = PetscFree(rvalues);CHKERRQ(ierr);
667   ierr = PetscFree(lens);CHKERRQ(ierr);
668   ierr = PetscFree(owner);CHKERRQ(ierr);
669   ierr = PetscFree(nprocs);CHKERRQ(ierr);
670 
671   /* actually zap the local rows */
672   /*
673         Zero the required rows. If the "diagonal block" of the matrix
674      is square and the user wishes to set the diagonal we use separate
675      code so that MatSetValues() is not called for each diagonal allocating
676      new memory, thus calling lots of mallocs and slowing things down.
677 
678   */
679   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
680   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
681   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
682     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
683   } else if (diag != 0.0) {
684     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
685     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
686       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
687 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
688     }
689     for (i = 0; i < slen; i++) {
690       row  = lrows[i] + rstart;
691       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
692     }
693     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
695   } else {
696     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
697   }
698   ierr = PetscFree(lrows);CHKERRQ(ierr);
699 
700   /* wait on sends */
701   if (nsends) {
702     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
703     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
704     ierr = PetscFree(send_status);CHKERRQ(ierr);
705   }
706   ierr = PetscFree(send_waits);CHKERRQ(ierr);
707   ierr = PetscFree(svalues);CHKERRQ(ierr);
708 
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatMult_MPIAIJ"
714 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
715 {
716   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode ierr;
718   PetscInt       nt;
719 
720   PetscFunctionBegin;
721   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
722   if (nt != A->cmap->n) {
723     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
724   }
725   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
726   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
727   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
728   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
734 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
735 {
736   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
737   PetscErrorCode ierr;
738 
739   PetscFunctionBegin;
740   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatMultAdd_MPIAIJ"
746 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
747 {
748   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
753   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
754   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
755   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
756   PetscFunctionReturn(0);
757 }
758 
759 #undef __FUNCT__
760 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
761 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
762 {
763   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
764   PetscErrorCode ierr;
765   PetscTruth     merged;
766 
767   PetscFunctionBegin;
768   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
769   /* do nondiagonal part */
770   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
771   if (!merged) {
772     /* send it on its way */
773     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
774     /* do local part */
775     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
776     /* receive remote parts: note this assumes the values are not actually */
777     /* added in yy until the next line, */
778     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
779   } else {
780     /* do local part */
781     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
782     /* send it on its way */
783     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
784     /* values actually were received in the Begin() but we need to call this nop */
785     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
786   }
787   PetscFunctionReturn(0);
788 }
789 
790 EXTERN_C_BEGIN
791 #undef __FUNCT__
792 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
793 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
794 {
795   MPI_Comm       comm;
796   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
797   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
798   IS             Me,Notme;
799   PetscErrorCode ierr;
800   PetscInt       M,N,first,last,*notme,i;
801   PetscMPIInt    size;
802 
803   PetscFunctionBegin;
804 
805   /* Easy test: symmetric diagonal block */
806   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
807   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
808   if (!*f) PetscFunctionReturn(0);
809   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
810   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
811   if (size == 1) PetscFunctionReturn(0);
812 
813   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
814   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
815   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
816   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
817   for (i=0; i<first; i++) notme[i] = i;
818   for (i=last; i<M; i++) notme[i-last+first] = i;
819   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
820   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
821   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
822   Aoff = Aoffs[0];
823   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
824   Boff = Boffs[0];
825   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
826   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
827   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
828   ierr = ISDestroy(Me);CHKERRQ(ierr);
829   ierr = ISDestroy(Notme);CHKERRQ(ierr);
830 
831   PetscFunctionReturn(0);
832 }
833 EXTERN_C_END
834 
835 #undef __FUNCT__
836 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
837 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
838 {
839   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
840   PetscErrorCode ierr;
841 
842   PetscFunctionBegin;
843   /* do nondiagonal part */
844   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
845   /* send it on its way */
846   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
847   /* do local part */
848   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
849   /* receive remote parts */
850   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
851   PetscFunctionReturn(0);
852 }
853 
854 /*
855   This only works correctly for square matrices where the subblock A->A is the
856    diagonal block
857 */
858 #undef __FUNCT__
859 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
860 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
861 {
862   PetscErrorCode ierr;
863   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
864 
865   PetscFunctionBegin;
866   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
867   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
868     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
869   }
870   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatScale_MPIAIJ"
876 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
877 {
878   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
883   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
884   PetscFunctionReturn(0);
885 }
886 
887 #undef __FUNCT__
888 #define __FUNCT__ "MatDestroy_MPIAIJ"
889 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
890 {
891   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
892   PetscErrorCode ierr;
893 
894   PetscFunctionBegin;
895 #if defined(PETSC_USE_LOG)
896   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
897 #endif
898   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
899   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
900   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
901   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
902 #if defined (PETSC_USE_CTABLE)
903   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
904 #else
905   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
906 #endif
907   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
908   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
909   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
910   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
911   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
912   ierr = PetscFree(aij);CHKERRQ(ierr);
913 
914   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
922   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
923   PetscFunctionReturn(0);
924 }
925 
926 #undef __FUNCT__
927 #define __FUNCT__ "MatView_MPIAIJ_Binary"
928 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
929 {
930   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
931   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
932   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
933   PetscErrorCode    ierr;
934   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
935   int               fd;
936   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
937   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
938   PetscScalar       *column_values;
939 
940   PetscFunctionBegin;
941   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
942   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
943   nz   = A->nz + B->nz;
944   if (!rank) {
945     header[0] = MAT_FILE_COOKIE;
946     header[1] = mat->rmap->N;
947     header[2] = mat->cmap->N;
948     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
949     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
950     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
951     /* get largest number of rows any processor has */
952     rlen = mat->rmap->n;
953     range = mat->rmap->range;
954     for (i=1; i<size; i++) {
955       rlen = PetscMax(rlen,range[i+1] - range[i]);
956     }
957   } else {
958     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
959     rlen = mat->rmap->n;
960   }
961 
962   /* load up the local row counts */
963   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
964   for (i=0; i<mat->rmap->n; i++) {
965     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
966   }
967 
968   /* store the row lengths to the file */
969   if (!rank) {
970     MPI_Status status;
971     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
972     for (i=1; i<size; i++) {
973       rlen = range[i+1] - range[i];
974       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
975       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
976     }
977   } else {
978     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
979   }
980   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
981 
982   /* load up the local column indices */
983   nzmax = nz; /* )th processor needs space a largest processor needs */
984   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
985   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
986   cnt  = 0;
987   for (i=0; i<mat->rmap->n; i++) {
988     for (j=B->i[i]; j<B->i[i+1]; j++) {
989       if ( (col = garray[B->j[j]]) > cstart) break;
990       column_indices[cnt++] = col;
991     }
992     for (k=A->i[i]; k<A->i[i+1]; k++) {
993       column_indices[cnt++] = A->j[k] + cstart;
994     }
995     for (; j<B->i[i+1]; j++) {
996       column_indices[cnt++] = garray[B->j[j]];
997     }
998   }
999   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1000 
1001   /* store the column indices to the file */
1002   if (!rank) {
1003     MPI_Status status;
1004     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1005     for (i=1; i<size; i++) {
1006       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1007       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1008       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1009       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1010     }
1011   } else {
1012     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1013     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1014   }
1015   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1016 
1017   /* load up the local column values */
1018   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1019   cnt  = 0;
1020   for (i=0; i<mat->rmap->n; i++) {
1021     for (j=B->i[i]; j<B->i[i+1]; j++) {
1022       if ( garray[B->j[j]] > cstart) break;
1023       column_values[cnt++] = B->a[j];
1024     }
1025     for (k=A->i[i]; k<A->i[i+1]; k++) {
1026       column_values[cnt++] = A->a[k];
1027     }
1028     for (; j<B->i[i+1]; j++) {
1029       column_values[cnt++] = B->a[j];
1030     }
1031   }
1032   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1033 
1034   /* store the column values to the file */
1035   if (!rank) {
1036     MPI_Status status;
1037     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1038     for (i=1; i<size; i++) {
1039       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1040       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1041       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1042       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1043     }
1044   } else {
1045     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1047   }
1048   ierr = PetscFree(column_values);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1054 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1055 {
1056   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1057   PetscErrorCode    ierr;
1058   PetscMPIInt       rank = aij->rank,size = aij->size;
1059   PetscTruth        isdraw,iascii,isbinary;
1060   PetscViewer       sviewer;
1061   PetscViewerFormat format;
1062 
1063   PetscFunctionBegin;
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1065   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1066   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1067   if (iascii) {
1068     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1069     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1070       MatInfo    info;
1071       PetscTruth inodes;
1072 
1073       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1074       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1075       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1076       if (!inodes) {
1077         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1078 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1079       } else {
1080         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1081 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1082       }
1083       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1084       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1085       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1086       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1087       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1088       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1089       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1090       PetscFunctionReturn(0);
1091     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1092       PetscInt   inodecount,inodelimit,*inodes;
1093       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1094       if (inodes) {
1095         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1096       } else {
1097         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1098       }
1099       PetscFunctionReturn(0);
1100     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1101       PetscFunctionReturn(0);
1102     }
1103   } else if (isbinary) {
1104     if (size == 1) {
1105       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1106       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1107     } else {
1108       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1109     }
1110     PetscFunctionReturn(0);
1111   } else if (isdraw) {
1112     PetscDraw  draw;
1113     PetscTruth isnull;
1114     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1115     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1116   }
1117 
1118   if (size == 1) {
1119     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1120     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1121   } else {
1122     /* assemble the entire matrix onto first processor. */
1123     Mat         A;
1124     Mat_SeqAIJ  *Aloc;
1125     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1126     MatScalar   *a;
1127 
1128     if (mat->rmap->N > 1024) {
1129       PetscTruth flg = PETSC_FALSE;
1130 
1131       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1132       if (!flg) {
1133         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1134       }
1135     }
1136 
1137     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1138     if (!rank) {
1139       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1140     } else {
1141       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1142     }
1143     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1144     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1145     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1146     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1147 
1148     /* copy over the A part */
1149     Aloc = (Mat_SeqAIJ*)aij->A->data;
1150     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1151     row = mat->rmap->rstart;
1152     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1153     for (i=0; i<m; i++) {
1154       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1155       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1156     }
1157     aj = Aloc->j;
1158     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1159 
1160     /* copy over the B part */
1161     Aloc = (Mat_SeqAIJ*)aij->B->data;
1162     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1163     row  = mat->rmap->rstart;
1164     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1165     ct   = cols;
1166     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1167     for (i=0; i<m; i++) {
1168       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1169       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1170     }
1171     ierr = PetscFree(ct);CHKERRQ(ierr);
1172     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1174     /*
1175        Everyone has to call to draw the matrix since the graphics waits are
1176        synchronized across all processors that share the PetscDraw object
1177     */
1178     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1179     if (!rank) {
1180       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1181       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1182     }
1183     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1184     ierr = MatDestroy(A);CHKERRQ(ierr);
1185   }
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "MatView_MPIAIJ"
1191 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1192 {
1193   PetscErrorCode ierr;
1194   PetscTruth     iascii,isdraw,issocket,isbinary;
1195 
1196   PetscFunctionBegin;
1197   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1198   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1200   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1201   if (iascii || isdraw || isbinary || issocket) {
1202     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1203   } else {
1204     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1205   }
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 #undef __FUNCT__
1210 #define __FUNCT__ "MatSOR_MPIAIJ"
1211 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1212 {
1213   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1214   PetscErrorCode ierr;
1215   Vec            bb1 = 0;
1216   PetscTruth     hasop;
1217 
1218   PetscFunctionBegin;
1219   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1220     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1221   }
1222 
1223   if (flag == SOR_APPLY_UPPER) {
1224     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1225     PetscFunctionReturn(0);
1226   }
1227 
1228   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1229     if (flag & SOR_ZERO_INITIAL_GUESS) {
1230       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1231       its--;
1232     }
1233 
1234     while (its--) {
1235       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1236       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1237 
1238       /* update rhs: bb1 = bb - B*x */
1239       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1240       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1241 
1242       /* local sweep */
1243       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1244     }
1245   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1246     if (flag & SOR_ZERO_INITIAL_GUESS) {
1247       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1248       its--;
1249     }
1250     while (its--) {
1251       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1252       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1253 
1254       /* update rhs: bb1 = bb - B*x */
1255       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1256       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1257 
1258       /* local sweep */
1259       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1260     }
1261   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1262     if (flag & SOR_ZERO_INITIAL_GUESS) {
1263       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1264       its--;
1265     }
1266     while (its--) {
1267       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269 
1270       /* update rhs: bb1 = bb - B*x */
1271       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1272       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1273 
1274       /* local sweep */
1275       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1276     }
1277   }  else if (flag & SOR_EISENSTAT) {
1278     Vec         xx1;
1279 
1280     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1281     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1282 
1283     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1285     if (!mat->diag) {
1286       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1287       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1288     }
1289     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1290     if (hasop) {
1291       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1292     } else {
1293       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1294     }
1295     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1296 
1297     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1298 
1299     /* local sweep */
1300     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1301     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1302     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1303   } else {
1304     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1305   }
1306 
1307   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatPermute_MPIAIJ"
1313 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1314 {
1315   MPI_Comm       comm,pcomm;
1316   PetscInt       first,local_size,nrows;
1317   const PetscInt *rows;
1318   PetscMPIInt    size;
1319   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1320   PetscErrorCode ierr;
1321 
1322   PetscFunctionBegin;
1323   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1324   /* make a collective version of 'rowp' */
1325   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1326   if (pcomm==comm) {
1327     crowp = rowp;
1328   } else {
1329     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1330     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1331     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1332     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1333   }
1334   /* collect the global row permutation and invert it */
1335   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1336   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1337   if (pcomm!=comm) {
1338     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1339   }
1340   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1341   /* get the local target indices */
1342   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1343   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1344   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1345   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1346   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1347   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1348   /* the column permutation is so much easier;
1349      make a local version of 'colp' and invert it */
1350   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1351   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1352   if (size==1) {
1353     lcolp = colp;
1354   } else {
1355     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1356     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1357     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1358   }
1359   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1360   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1361   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1362   if (size>1) {
1363     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1364     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1365   }
1366   /* now we just get the submatrix */
1367   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1368   /* clean up */
1369   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1370   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1376 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1377 {
1378   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1379   Mat            A = mat->A,B = mat->B;
1380   PetscErrorCode ierr;
1381   PetscReal      isend[5],irecv[5];
1382 
1383   PetscFunctionBegin;
1384   info->block_size     = 1.0;
1385   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1386   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1387   isend[3] = info->memory;  isend[4] = info->mallocs;
1388   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1389   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1390   isend[3] += info->memory;  isend[4] += info->mallocs;
1391   if (flag == MAT_LOCAL) {
1392     info->nz_used      = isend[0];
1393     info->nz_allocated = isend[1];
1394     info->nz_unneeded  = isend[2];
1395     info->memory       = isend[3];
1396     info->mallocs      = isend[4];
1397   } else if (flag == MAT_GLOBAL_MAX) {
1398     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1399     info->nz_used      = irecv[0];
1400     info->nz_allocated = irecv[1];
1401     info->nz_unneeded  = irecv[2];
1402     info->memory       = irecv[3];
1403     info->mallocs      = irecv[4];
1404   } else if (flag == MAT_GLOBAL_SUM) {
1405     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1406     info->nz_used      = irecv[0];
1407     info->nz_allocated = irecv[1];
1408     info->nz_unneeded  = irecv[2];
1409     info->memory       = irecv[3];
1410     info->mallocs      = irecv[4];
1411   }
1412   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1413   info->fill_ratio_needed = 0;
1414   info->factor_mallocs    = 0;
1415 
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatSetOption_MPIAIJ"
1421 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1422 {
1423   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1424   PetscErrorCode ierr;
1425 
1426   PetscFunctionBegin;
1427   switch (op) {
1428   case MAT_NEW_NONZERO_LOCATIONS:
1429   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1430   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1431   case MAT_KEEP_NONZERO_PATTERN:
1432   case MAT_NEW_NONZERO_LOCATION_ERR:
1433   case MAT_USE_INODES:
1434   case MAT_IGNORE_ZERO_ENTRIES:
1435     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1436     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1437     break;
1438   case MAT_ROW_ORIENTED:
1439     a->roworiented = flg;
1440     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1441     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1442     break;
1443   case MAT_NEW_DIAGONALS:
1444     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1445     break;
1446   case MAT_IGNORE_OFF_PROC_ENTRIES:
1447     a->donotstash = PETSC_TRUE;
1448     break;
1449   case MAT_SYMMETRIC:
1450     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1451     break;
1452   case MAT_STRUCTURALLY_SYMMETRIC:
1453     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1454     break;
1455   case MAT_HERMITIAN:
1456     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1457     break;
1458   case MAT_SYMMETRY_ETERNAL:
1459     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1460     break;
1461   default:
1462     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1463   }
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatGetRow_MPIAIJ"
1469 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1470 {
1471   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1472   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1473   PetscErrorCode ierr;
1474   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1475   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1476   PetscInt       *cmap,*idx_p;
1477 
1478   PetscFunctionBegin;
1479   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1480   mat->getrowactive = PETSC_TRUE;
1481 
1482   if (!mat->rowvalues && (idx || v)) {
1483     /*
1484         allocate enough space to hold information from the longest row.
1485     */
1486     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1487     PetscInt     max = 1,tmp;
1488     for (i=0; i<matin->rmap->n; i++) {
1489       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1490       if (max < tmp) { max = tmp; }
1491     }
1492     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1493     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1494   }
1495 
1496   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1497   lrow = row - rstart;
1498 
1499   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1500   if (!v)   {pvA = 0; pvB = 0;}
1501   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1502   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1503   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1504   nztot = nzA + nzB;
1505 
1506   cmap  = mat->garray;
1507   if (v  || idx) {
1508     if (nztot) {
1509       /* Sort by increasing column numbers, assuming A and B already sorted */
1510       PetscInt imark = -1;
1511       if (v) {
1512         *v = v_p = mat->rowvalues;
1513         for (i=0; i<nzB; i++) {
1514           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1515           else break;
1516         }
1517         imark = i;
1518         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1519         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1520       }
1521       if (idx) {
1522         *idx = idx_p = mat->rowindices;
1523         if (imark > -1) {
1524           for (i=0; i<imark; i++) {
1525             idx_p[i] = cmap[cworkB[i]];
1526           }
1527         } else {
1528           for (i=0; i<nzB; i++) {
1529             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1530             else break;
1531           }
1532           imark = i;
1533         }
1534         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1535         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1536       }
1537     } else {
1538       if (idx) *idx = 0;
1539       if (v)   *v   = 0;
1540     }
1541   }
1542   *nz = nztot;
1543   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1544   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1550 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1551 {
1552   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1553 
1554   PetscFunctionBegin;
1555   if (!aij->getrowactive) {
1556     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1557   }
1558   aij->getrowactive = PETSC_FALSE;
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatNorm_MPIAIJ"
1564 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1565 {
1566   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1567   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1568   PetscErrorCode ierr;
1569   PetscInt       i,j,cstart = mat->cmap->rstart;
1570   PetscReal      sum = 0.0;
1571   MatScalar      *v;
1572 
1573   PetscFunctionBegin;
1574   if (aij->size == 1) {
1575     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1576   } else {
1577     if (type == NORM_FROBENIUS) {
1578       v = amat->a;
1579       for (i=0; i<amat->nz; i++) {
1580 #if defined(PETSC_USE_COMPLEX)
1581         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1582 #else
1583         sum += (*v)*(*v); v++;
1584 #endif
1585       }
1586       v = bmat->a;
1587       for (i=0; i<bmat->nz; i++) {
1588 #if defined(PETSC_USE_COMPLEX)
1589         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1590 #else
1591         sum += (*v)*(*v); v++;
1592 #endif
1593       }
1594       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1595       *norm = sqrt(*norm);
1596     } else if (type == NORM_1) { /* max column norm */
1597       PetscReal *tmp,*tmp2;
1598       PetscInt  *jj,*garray = aij->garray;
1599       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1600       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1601       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1602       *norm = 0.0;
1603       v = amat->a; jj = amat->j;
1604       for (j=0; j<amat->nz; j++) {
1605         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1606       }
1607       v = bmat->a; jj = bmat->j;
1608       for (j=0; j<bmat->nz; j++) {
1609         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1610       }
1611       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1612       for (j=0; j<mat->cmap->N; j++) {
1613         if (tmp2[j] > *norm) *norm = tmp2[j];
1614       }
1615       ierr = PetscFree(tmp);CHKERRQ(ierr);
1616       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1617     } else if (type == NORM_INFINITY) { /* max row norm */
1618       PetscReal ntemp = 0.0;
1619       for (j=0; j<aij->A->rmap->n; j++) {
1620         v = amat->a + amat->i[j];
1621         sum = 0.0;
1622         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1623           sum += PetscAbsScalar(*v); v++;
1624         }
1625         v = bmat->a + bmat->i[j];
1626         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1627           sum += PetscAbsScalar(*v); v++;
1628         }
1629         if (sum > ntemp) ntemp = sum;
1630       }
1631       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1632     } else {
1633       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1634     }
1635   }
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 #undef __FUNCT__
1640 #define __FUNCT__ "MatTranspose_MPIAIJ"
1641 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1642 {
1643   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1644   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1645   PetscErrorCode ierr;
1646   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1647   PetscInt       cstart=A->cmap->rstart,ncol;
1648   Mat            B;
1649   MatScalar      *array;
1650 
1651   PetscFunctionBegin;
1652   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1653 
1654   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1655   ai = Aloc->i; aj = Aloc->j;
1656   bi = Bloc->i; bj = Bloc->j;
1657   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1658     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1659     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1660     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1661     for (i=0; i<ai[ma]; i++){
1662       d_nnz[aj[i]] ++;
1663       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1664     }
1665 
1666     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1667     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1668     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1669     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1670     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1671   } else {
1672     B = *matout;
1673   }
1674 
1675   /* copy over the A part */
1676   array = Aloc->a;
1677   row = A->rmap->rstart;
1678   for (i=0; i<ma; i++) {
1679     ncol = ai[i+1]-ai[i];
1680     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1681     row++; array += ncol; aj += ncol;
1682   }
1683   aj = Aloc->j;
1684   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1685 
1686   /* copy over the B part */
1687   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1688   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1689   array = Bloc->a;
1690   row = A->rmap->rstart;
1691   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1692   cols_tmp = cols;
1693   for (i=0; i<mb; i++) {
1694     ncol = bi[i+1]-bi[i];
1695     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1696     row++; array += ncol; cols_tmp += ncol;
1697   }
1698   ierr = PetscFree(cols);CHKERRQ(ierr);
1699 
1700   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1701   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1702   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1703     *matout = B;
1704   } else {
1705     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1706   }
1707   PetscFunctionReturn(0);
1708 }
1709 
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1712 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1713 {
1714   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1715   Mat            a = aij->A,b = aij->B;
1716   PetscErrorCode ierr;
1717   PetscInt       s1,s2,s3;
1718 
1719   PetscFunctionBegin;
1720   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1721   if (rr) {
1722     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1723     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1724     /* Overlap communication with computation. */
1725     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1726   }
1727   if (ll) {
1728     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1729     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1730     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1731   }
1732   /* scale  the diagonal block */
1733   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1734 
1735   if (rr) {
1736     /* Do a scatter end and then right scale the off-diagonal block */
1737     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1738     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1739   }
1740 
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 #undef __FUNCT__
1745 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1746 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1747 {
1748   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1749   PetscErrorCode ierr;
1750 
1751   PetscFunctionBegin;
1752   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1753   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1754   PetscFunctionReturn(0);
1755 }
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1758 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1759 {
1760   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1761   PetscErrorCode ierr;
1762 
1763   PetscFunctionBegin;
1764   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 #undef __FUNCT__
1769 #define __FUNCT__ "MatEqual_MPIAIJ"
1770 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1771 {
1772   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1773   Mat            a,b,c,d;
1774   PetscTruth     flg;
1775   PetscErrorCode ierr;
1776 
1777   PetscFunctionBegin;
1778   a = matA->A; b = matA->B;
1779   c = matB->A; d = matB->B;
1780 
1781   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1782   if (flg) {
1783     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1784   }
1785   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 #undef __FUNCT__
1790 #define __FUNCT__ "MatCopy_MPIAIJ"
1791 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1792 {
1793   PetscErrorCode ierr;
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1795   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1796 
1797   PetscFunctionBegin;
1798   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1799   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1800     /* because of the column compression in the off-processor part of the matrix a->B,
1801        the number of columns in a->B and b->B may be different, hence we cannot call
1802        the MatCopy() directly on the two parts. If need be, we can provide a more
1803        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1804        then copying the submatrices */
1805     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1806   } else {
1807     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1808     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1809   }
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1815 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1816 {
1817   PetscErrorCode ierr;
1818 
1819   PetscFunctionBegin;
1820   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 #include "petscblaslapack.h"
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatAXPY_MPIAIJ"
1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1828 {
1829   PetscErrorCode ierr;
1830   PetscInt       i;
1831   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1832   PetscBLASInt   bnz,one=1;
1833   Mat_SeqAIJ     *x,*y;
1834 
1835   PetscFunctionBegin;
1836   if (str == SAME_NONZERO_PATTERN) {
1837     PetscScalar alpha = a;
1838     x = (Mat_SeqAIJ *)xx->A->data;
1839     y = (Mat_SeqAIJ *)yy->A->data;
1840     bnz = PetscBLASIntCast(x->nz);
1841     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1842     x = (Mat_SeqAIJ *)xx->B->data;
1843     y = (Mat_SeqAIJ *)yy->B->data;
1844     bnz = PetscBLASIntCast(x->nz);
1845     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1846   } else if (str == SUBSET_NONZERO_PATTERN) {
1847     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1848 
1849     x = (Mat_SeqAIJ *)xx->B->data;
1850     y = (Mat_SeqAIJ *)yy->B->data;
1851     if (y->xtoy && y->XtoY != xx->B) {
1852       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1853       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1854     }
1855     if (!y->xtoy) { /* get xtoy */
1856       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1857       y->XtoY = xx->B;
1858       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1859     }
1860     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1861   } else {
1862     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1863   }
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatConjugate_MPIAIJ"
1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1872 {
1873 #if defined(PETSC_USE_COMPLEX)
1874   PetscErrorCode ierr;
1875   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1876 
1877   PetscFunctionBegin;
1878   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1879   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1880 #else
1881   PetscFunctionBegin;
1882 #endif
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatRealPart_MPIAIJ"
1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1889 {
1890   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1891   PetscErrorCode ierr;
1892 
1893   PetscFunctionBegin;
1894   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1895   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1902 {
1903   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1904   PetscErrorCode ierr;
1905 
1906   PetscFunctionBegin;
1907   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1908   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #ifdef PETSC_HAVE_PBGL
1913 
1914 #include <boost/parallel/mpi/bsp_process_group.hpp>
1915 #include <boost/graph/distributed/ilu_default_graph.hpp>
1916 #include <boost/graph/distributed/ilu_0_block.hpp>
1917 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1918 #include <boost/graph/distributed/petsc/interface.hpp>
1919 #include <boost/multi_array.hpp>
1920 #include <boost/parallel/distributed_property_map->hpp>
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1924 /*
1925   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1926 */
1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1928 {
1929   namespace petsc = boost::distributed::petsc;
1930 
1931   namespace graph_dist = boost::graph::distributed;
1932   using boost::graph::distributed::ilu_default::process_group_type;
1933   using boost::graph::ilu_permuted;
1934 
1935   PetscTruth      row_identity, col_identity;
1936   PetscContainer  c;
1937   PetscInt        m, n, M, N;
1938   PetscErrorCode  ierr;
1939 
1940   PetscFunctionBegin;
1941   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1942   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1943   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1944   if (!row_identity || !col_identity) {
1945     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1946   }
1947 
1948   process_group_type pg;
1949   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1950   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1951   lgraph_type&   level_graph = *lgraph_p;
1952   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1953 
1954   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1955   ilu_permuted(level_graph);
1956 
1957   /* put together the new matrix */
1958   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1959   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1960   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1961   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1962   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1963   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1964   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1965 
1966   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1967   ierr = PetscContainerSetPointer(c, lgraph_p);
1968   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1975 {
1976   PetscFunctionBegin;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatSolve_MPIAIJ"
1982 /*
1983   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1984 */
1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1986 {
1987   namespace graph_dist = boost::graph::distributed;
1988 
1989   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1990   lgraph_type*   lgraph_p;
1991   PetscContainer c;
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1996   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1997   ierr = VecCopy(b, x);CHKERRQ(ierr);
1998 
1999   PetscScalar* array_x;
2000   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2001   PetscInt sx;
2002   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2003 
2004   PetscScalar* array_b;
2005   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2006   PetscInt sb;
2007   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2008 
2009   lgraph_type&   level_graph = *lgraph_p;
2010   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2011 
2012   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2013   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2014                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2015 
2016   typedef boost::iterator_property_map<array_ref_type::iterator,
2017                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2018   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2019                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2020 
2021   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2022 
2023   PetscFunctionReturn(0);
2024 }
2025 #endif
2026 
2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2028   PetscInt       nzlocal,nsends,nrecvs;
2029   PetscMPIInt    *send_rank;
2030   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
2031   PetscScalar    *sbuf_a,**rbuf_a;
2032   PetscErrorCode (*MatDestroy)(Mat);
2033 } Mat_Redundant;
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2038 {
2039   PetscErrorCode       ierr;
2040   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2041   PetscInt             i;
2042 
2043   PetscFunctionBegin;
2044   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
2045   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2046   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2047   for (i=0; i<redund->nrecvs; i++){
2048     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2049     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2050   }
2051   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2052   ierr = PetscFree(redund);CHKERRQ(ierr);
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 #undef __FUNCT__
2057 #define __FUNCT__ "MatDestroy_MatRedundant"
2058 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2059 {
2060   PetscErrorCode  ierr;
2061   PetscContainer  container;
2062   Mat_Redundant   *redund=PETSC_NULL;
2063 
2064   PetscFunctionBegin;
2065   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2066   if (container) {
2067     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2068   } else {
2069     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2070   }
2071   A->ops->destroy = redund->MatDestroy;
2072   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2073   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2074   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2081 {
2082   PetscMPIInt    rank,size;
2083   MPI_Comm       comm=((PetscObject)mat)->comm;
2084   PetscErrorCode ierr;
2085   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2086   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2087   PetscInt       *rowrange=mat->rmap->range;
2088   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2089   Mat            A=aij->A,B=aij->B,C=*matredundant;
2090   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2091   PetscScalar    *sbuf_a;
2092   PetscInt       nzlocal=a->nz+b->nz;
2093   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2094   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2095   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2096   MatScalar      *aworkA,*aworkB;
2097   PetscScalar    *vals;
2098   PetscMPIInt    tag1,tag2,tag3,imdex;
2099   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2100                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2101   MPI_Status     recv_status,*send_status;
2102   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2103   PetscInt       **rbuf_j=PETSC_NULL;
2104   PetscScalar    **rbuf_a=PETSC_NULL;
2105   Mat_Redundant  *redund=PETSC_NULL;
2106   PetscContainer container;
2107 
2108   PetscFunctionBegin;
2109   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2111 
2112   if (reuse == MAT_REUSE_MATRIX) {
2113     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2114     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2115     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2116     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2117     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2118     if (container) {
2119       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2120     } else {
2121       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2122     }
2123     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2124 
2125     nsends    = redund->nsends;
2126     nrecvs    = redund->nrecvs;
2127     send_rank = redund->send_rank; recv_rank = send_rank + size;
2128     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2129     sbuf_j    = redund->sbuf_j;
2130     sbuf_a    = redund->sbuf_a;
2131     rbuf_j    = redund->rbuf_j;
2132     rbuf_a    = redund->rbuf_a;
2133   }
2134 
2135   if (reuse == MAT_INITIAL_MATRIX){
2136     PetscMPIInt  subrank,subsize;
2137     PetscInt     nleftover,np_subcomm;
2138     /* get the destination processors' id send_rank, nsends and nrecvs */
2139     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2140     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2141     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2142     recv_rank = send_rank + size;
2143     np_subcomm = size/nsubcomm;
2144     nleftover  = size - nsubcomm*np_subcomm;
2145     nsends = 0; nrecvs = 0;
2146     for (i=0; i<size; i++){ /* i=rank*/
2147       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2148         send_rank[nsends] = i; nsends++;
2149         recv_rank[nrecvs++] = i;
2150       }
2151     }
2152     if (rank >= size - nleftover){/* this proc is a leftover processor */
2153       i = size-nleftover-1;
2154       j = 0;
2155       while (j < nsubcomm - nleftover){
2156         send_rank[nsends++] = i;
2157         i--; j++;
2158       }
2159     }
2160 
2161     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2162       for (i=0; i<nleftover; i++){
2163         recv_rank[nrecvs++] = size-nleftover+i;
2164       }
2165     }
2166 
2167     /* allocate sbuf_j, sbuf_a */
2168     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2169     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2170     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2171   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2172 
2173   /* copy mat's local entries into the buffers */
2174   if (reuse == MAT_INITIAL_MATRIX){
2175     rownz_max = 0;
2176     rptr = sbuf_j;
2177     cols = sbuf_j + rend-rstart + 1;
2178     vals = sbuf_a;
2179     rptr[0] = 0;
2180     for (i=0; i<rend-rstart; i++){
2181       row = i + rstart;
2182       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2183       ncols  = nzA + nzB;
2184       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2185       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2186       /* load the column indices for this row into cols */
2187       lwrite = 0;
2188       for (l=0; l<nzB; l++) {
2189         if ((ctmp = bmap[cworkB[l]]) < cstart){
2190           vals[lwrite]   = aworkB[l];
2191           cols[lwrite++] = ctmp;
2192         }
2193       }
2194       for (l=0; l<nzA; l++){
2195         vals[lwrite]   = aworkA[l];
2196         cols[lwrite++] = cstart + cworkA[l];
2197       }
2198       for (l=0; l<nzB; l++) {
2199         if ((ctmp = bmap[cworkB[l]]) >= cend){
2200           vals[lwrite]   = aworkB[l];
2201           cols[lwrite++] = ctmp;
2202         }
2203       }
2204       vals += ncols;
2205       cols += ncols;
2206       rptr[i+1] = rptr[i] + ncols;
2207       if (rownz_max < ncols) rownz_max = ncols;
2208     }
2209     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2210   } else { /* only copy matrix values into sbuf_a */
2211     rptr = sbuf_j;
2212     vals = sbuf_a;
2213     rptr[0] = 0;
2214     for (i=0; i<rend-rstart; i++){
2215       row = i + rstart;
2216       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2217       ncols  = nzA + nzB;
2218       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2219       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2220       lwrite = 0;
2221       for (l=0; l<nzB; l++) {
2222         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2223       }
2224       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2225       for (l=0; l<nzB; l++) {
2226         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2227       }
2228       vals += ncols;
2229       rptr[i+1] = rptr[i] + ncols;
2230     }
2231   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2232 
2233   /* send nzlocal to others, and recv other's nzlocal */
2234   /*--------------------------------------------------*/
2235   if (reuse == MAT_INITIAL_MATRIX){
2236     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2237     s_waits2 = s_waits3 + nsends;
2238     s_waits1 = s_waits2 + nsends;
2239     r_waits1 = s_waits1 + nsends;
2240     r_waits2 = r_waits1 + nrecvs;
2241     r_waits3 = r_waits2 + nrecvs;
2242   } else {
2243     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2244     r_waits3 = s_waits3 + nsends;
2245   }
2246 
2247   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2248   if (reuse == MAT_INITIAL_MATRIX){
2249     /* get new tags to keep the communication clean */
2250     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2251     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2252     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2253     rbuf_nz = sbuf_nz + nsends;
2254 
2255     /* post receives of other's nzlocal */
2256     for (i=0; i<nrecvs; i++){
2257       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2258     }
2259     /* send nzlocal to others */
2260     for (i=0; i<nsends; i++){
2261       sbuf_nz[i] = nzlocal;
2262       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2263     }
2264     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2265     count = nrecvs;
2266     while (count) {
2267       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2268       recv_rank[imdex] = recv_status.MPI_SOURCE;
2269       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2270       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2271 
2272       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2273       rbuf_nz[imdex] += i + 2;
2274       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2275       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2276       count--;
2277     }
2278     /* wait on sends of nzlocal */
2279     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2280     /* send mat->i,j to others, and recv from other's */
2281     /*------------------------------------------------*/
2282     for (i=0; i<nsends; i++){
2283       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2284       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2285     }
2286     /* wait on receives of mat->i,j */
2287     /*------------------------------*/
2288     count = nrecvs;
2289     while (count) {
2290       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2291       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2292       count--;
2293     }
2294     /* wait on sends of mat->i,j */
2295     /*---------------------------*/
2296     if (nsends) {
2297       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2298     }
2299   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2300 
2301   /* post receives, send and receive mat->a */
2302   /*----------------------------------------*/
2303   for (imdex=0; imdex<nrecvs; imdex++) {
2304     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2305   }
2306   for (i=0; i<nsends; i++){
2307     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2308   }
2309   count = nrecvs;
2310   while (count) {
2311     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2312     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2313     count--;
2314   }
2315   if (nsends) {
2316     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2317   }
2318 
2319   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2320 
2321   /* create redundant matrix */
2322   /*-------------------------*/
2323   if (reuse == MAT_INITIAL_MATRIX){
2324     /* compute rownz_max for preallocation */
2325     for (imdex=0; imdex<nrecvs; imdex++){
2326       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2327       rptr = rbuf_j[imdex];
2328       for (i=0; i<j; i++){
2329         ncols = rptr[i+1] - rptr[i];
2330         if (rownz_max < ncols) rownz_max = ncols;
2331       }
2332     }
2333 
2334     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2335     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2336     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2337     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2338     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2339   } else {
2340     C = *matredundant;
2341   }
2342 
2343   /* insert local matrix entries */
2344   rptr = sbuf_j;
2345   cols = sbuf_j + rend-rstart + 1;
2346   vals = sbuf_a;
2347   for (i=0; i<rend-rstart; i++){
2348     row   = i + rstart;
2349     ncols = rptr[i+1] - rptr[i];
2350     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2351     vals += ncols;
2352     cols += ncols;
2353   }
2354   /* insert received matrix entries */
2355   for (imdex=0; imdex<nrecvs; imdex++){
2356     rstart = rowrange[recv_rank[imdex]];
2357     rend   = rowrange[recv_rank[imdex]+1];
2358     rptr = rbuf_j[imdex];
2359     cols = rbuf_j[imdex] + rend-rstart + 1;
2360     vals = rbuf_a[imdex];
2361     for (i=0; i<rend-rstart; i++){
2362       row   = i + rstart;
2363       ncols = rptr[i+1] - rptr[i];
2364       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2365       vals += ncols;
2366       cols += ncols;
2367     }
2368   }
2369   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2370   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2371   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2372   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2373   if (reuse == MAT_INITIAL_MATRIX){
2374     PetscContainer container;
2375     *matredundant = C;
2376     /* create a supporting struct and attach it to C for reuse */
2377     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2378     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2379     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2380     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2381     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2382 
2383     redund->nzlocal = nzlocal;
2384     redund->nsends  = nsends;
2385     redund->nrecvs  = nrecvs;
2386     redund->send_rank = send_rank;
2387     redund->sbuf_nz = sbuf_nz;
2388     redund->sbuf_j  = sbuf_j;
2389     redund->sbuf_a  = sbuf_a;
2390     redund->rbuf_j  = rbuf_j;
2391     redund->rbuf_a  = rbuf_a;
2392 
2393     redund->MatDestroy = C->ops->destroy;
2394     C->ops->destroy    = MatDestroy_MatRedundant;
2395   }
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 #undef __FUNCT__
2400 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2401 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2402 {
2403   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2404   PetscErrorCode ierr;
2405   PetscInt       i,*idxb = 0;
2406   PetscScalar    *va,*vb;
2407   Vec            vtmp;
2408 
2409   PetscFunctionBegin;
2410   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2411   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2412   if (idx) {
2413     for (i=0; i<A->rmap->n; i++) {
2414       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2415     }
2416   }
2417 
2418   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2419   if (idx) {
2420     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2421   }
2422   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2423   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2424 
2425   for (i=0; i<A->rmap->n; i++){
2426     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2427       va[i] = vb[i];
2428       if (idx) idx[i] = a->garray[idxb[i]];
2429     }
2430   }
2431 
2432   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2433   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2434   if (idxb) {
2435     ierr = PetscFree(idxb);CHKERRQ(ierr);
2436   }
2437   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2438   PetscFunctionReturn(0);
2439 }
2440 
2441 #undef __FUNCT__
2442 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2443 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2444 {
2445   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2446   PetscErrorCode ierr;
2447   PetscInt       i,*idxb = 0;
2448   PetscScalar    *va,*vb;
2449   Vec            vtmp;
2450 
2451   PetscFunctionBegin;
2452   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2453   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2454   if (idx) {
2455     for (i=0; i<A->cmap->n; i++) {
2456       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2457     }
2458   }
2459 
2460   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2461   if (idx) {
2462     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2463   }
2464   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2465   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2466 
2467   for (i=0; i<A->rmap->n; i++){
2468     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2469       va[i] = vb[i];
2470       if (idx) idx[i] = a->garray[idxb[i]];
2471     }
2472   }
2473 
2474   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2475   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2476   if (idxb) {
2477     ierr = PetscFree(idxb);CHKERRQ(ierr);
2478   }
2479   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2485 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2486 {
2487   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2488   PetscInt       n      = A->rmap->n;
2489   PetscInt       cstart = A->cmap->rstart;
2490   PetscInt      *cmap   = mat->garray;
2491   PetscInt      *diagIdx, *offdiagIdx;
2492   Vec            diagV, offdiagV;
2493   PetscScalar   *a, *diagA, *offdiagA;
2494   PetscInt       r;
2495   PetscErrorCode ierr;
2496 
2497   PetscFunctionBegin;
2498   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2499   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2500   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2501   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2502   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2503   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2504   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2505   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2506   for(r = 0; r < n; ++r) {
2507     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2508       a[r]   = diagA[r];
2509       idx[r] = cstart + diagIdx[r];
2510     } else {
2511       a[r]   = offdiagA[r];
2512       idx[r] = cmap[offdiagIdx[r]];
2513     }
2514   }
2515   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2516   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2517   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2518   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2519   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2520   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2526 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2527 {
2528   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2529   PetscInt       n      = A->rmap->n;
2530   PetscInt       cstart = A->cmap->rstart;
2531   PetscInt      *cmap   = mat->garray;
2532   PetscInt      *diagIdx, *offdiagIdx;
2533   Vec            diagV, offdiagV;
2534   PetscScalar   *a, *diagA, *offdiagA;
2535   PetscInt       r;
2536   PetscErrorCode ierr;
2537 
2538   PetscFunctionBegin;
2539   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2540   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2541   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2542   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2543   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2544   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2545   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2546   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2547   for(r = 0; r < n; ++r) {
2548     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2549       a[r]   = diagA[r];
2550       idx[r] = cstart + diagIdx[r];
2551     } else {
2552       a[r]   = offdiagA[r];
2553       idx[r] = cmap[offdiagIdx[r]];
2554     }
2555   }
2556   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2557   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2558   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2559   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2560   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2561   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 #undef __FUNCT__
2566 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2567 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2568 {
2569   PetscErrorCode ierr;
2570   Mat            *dummy;
2571 
2572   PetscFunctionBegin;
2573   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2574   *newmat = *dummy;
2575   ierr = PetscFree(dummy);CHKERRQ(ierr);
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2580 /* -------------------------------------------------------------------*/
2581 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2582        MatGetRow_MPIAIJ,
2583        MatRestoreRow_MPIAIJ,
2584        MatMult_MPIAIJ,
2585 /* 4*/ MatMultAdd_MPIAIJ,
2586        MatMultTranspose_MPIAIJ,
2587        MatMultTransposeAdd_MPIAIJ,
2588 #ifdef PETSC_HAVE_PBGL
2589        MatSolve_MPIAIJ,
2590 #else
2591        0,
2592 #endif
2593        0,
2594        0,
2595 /*10*/ 0,
2596        0,
2597        0,
2598        MatSOR_MPIAIJ,
2599        MatTranspose_MPIAIJ,
2600 /*15*/ MatGetInfo_MPIAIJ,
2601        MatEqual_MPIAIJ,
2602        MatGetDiagonal_MPIAIJ,
2603        MatDiagonalScale_MPIAIJ,
2604        MatNorm_MPIAIJ,
2605 /*20*/ MatAssemblyBegin_MPIAIJ,
2606        MatAssemblyEnd_MPIAIJ,
2607        MatSetOption_MPIAIJ,
2608        MatZeroEntries_MPIAIJ,
2609 /*24*/ MatZeroRows_MPIAIJ,
2610        0,
2611 #ifdef PETSC_HAVE_PBGL
2612        0,
2613 #else
2614        0,
2615 #endif
2616        0,
2617        0,
2618 /*29*/ MatSetUpPreallocation_MPIAIJ,
2619 #ifdef PETSC_HAVE_PBGL
2620        0,
2621 #else
2622        0,
2623 #endif
2624        0,
2625        0,
2626        0,
2627 /*34*/ MatDuplicate_MPIAIJ,
2628        0,
2629        0,
2630        0,
2631        0,
2632 /*39*/ MatAXPY_MPIAIJ,
2633        MatGetSubMatrices_MPIAIJ,
2634        MatIncreaseOverlap_MPIAIJ,
2635        MatGetValues_MPIAIJ,
2636        MatCopy_MPIAIJ,
2637 /*44*/ MatGetRowMax_MPIAIJ,
2638        MatScale_MPIAIJ,
2639        0,
2640        0,
2641        0,
2642 /*49*/ MatSetBlockSize_MPIAIJ,
2643        0,
2644        0,
2645        0,
2646        0,
2647 /*54*/ MatFDColoringCreate_MPIAIJ,
2648        0,
2649        MatSetUnfactored_MPIAIJ,
2650        MatPermute_MPIAIJ,
2651        0,
2652 /*59*/ MatGetSubMatrix_MPIAIJ,
2653        MatDestroy_MPIAIJ,
2654        MatView_MPIAIJ,
2655        0,
2656        0,
2657 /*64*/ 0,
2658        0,
2659        0,
2660        0,
2661        0,
2662 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2663        MatGetRowMinAbs_MPIAIJ,
2664        0,
2665        MatSetColoring_MPIAIJ,
2666 #if defined(PETSC_HAVE_ADIC)
2667        MatSetValuesAdic_MPIAIJ,
2668 #else
2669        0,
2670 #endif
2671        MatSetValuesAdifor_MPIAIJ,
2672 /*75*/ MatFDColoringApply_AIJ,
2673        0,
2674        0,
2675        0,
2676        0,
2677 /*80*/ 0,
2678        0,
2679        0,
2680 /*83*/ MatLoad_MPIAIJ,
2681        0,
2682        0,
2683        0,
2684        0,
2685        0,
2686 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2687        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2688        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2689        MatPtAP_Basic,
2690        MatPtAPSymbolic_MPIAIJ,
2691 /*94*/ MatPtAPNumeric_MPIAIJ,
2692        0,
2693        0,
2694        0,
2695        0,
2696 /*99*/ 0,
2697        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2698        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2699        MatConjugate_MPIAIJ,
2700        0,
2701 /*104*/MatSetValuesRow_MPIAIJ,
2702        MatRealPart_MPIAIJ,
2703        MatImaginaryPart_MPIAIJ,
2704        0,
2705        0,
2706 /*109*/0,
2707        MatGetRedundantMatrix_MPIAIJ,
2708        MatGetRowMin_MPIAIJ,
2709        0,
2710        0,
2711 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2712        0,
2713        0,
2714        0,
2715        0,
2716        0
2717 };
2718 
2719 /* ----------------------------------------------------------------------------------------*/
2720 
2721 EXTERN_C_BEGIN
2722 #undef __FUNCT__
2723 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2724 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2725 {
2726   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2727   PetscErrorCode ierr;
2728 
2729   PetscFunctionBegin;
2730   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2731   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2732   PetscFunctionReturn(0);
2733 }
2734 EXTERN_C_END
2735 
2736 EXTERN_C_BEGIN
2737 #undef __FUNCT__
2738 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2739 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2740 {
2741   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2742   PetscErrorCode ierr;
2743 
2744   PetscFunctionBegin;
2745   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2746   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2747   PetscFunctionReturn(0);
2748 }
2749 EXTERN_C_END
2750 
2751 #include "petscpc.h"
2752 EXTERN_C_BEGIN
2753 #undef __FUNCT__
2754 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2755 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2756 {
2757   Mat_MPIAIJ     *b;
2758   PetscErrorCode ierr;
2759   PetscInt       i;
2760 
2761   PetscFunctionBegin;
2762   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2763   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2764   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2765   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2766 
2767   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2768   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2769   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2770   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2771   if (d_nnz) {
2772     for (i=0; i<B->rmap->n; i++) {
2773       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2774     }
2775   }
2776   if (o_nnz) {
2777     for (i=0; i<B->rmap->n; i++) {
2778       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2779     }
2780   }
2781   b = (Mat_MPIAIJ*)B->data;
2782 
2783   if (!B->preallocated) {
2784     /* Explicitly create 2 MATSEQAIJ matrices. */
2785     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2786     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2787     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2788     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2789     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2790     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2791     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2792     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2793   }
2794 
2795   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2796   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2797   B->preallocated = PETSC_TRUE;
2798   PetscFunctionReturn(0);
2799 }
2800 EXTERN_C_END
2801 
2802 #undef __FUNCT__
2803 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2804 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2805 {
2806   Mat            mat;
2807   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2808   PetscErrorCode ierr;
2809 
2810   PetscFunctionBegin;
2811   *newmat       = 0;
2812   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2813   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2814   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2815   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2816   a    = (Mat_MPIAIJ*)mat->data;
2817 
2818   mat->factor       = matin->factor;
2819   mat->rmap->bs      = matin->rmap->bs;
2820   mat->assembled    = PETSC_TRUE;
2821   mat->insertmode   = NOT_SET_VALUES;
2822   mat->preallocated = PETSC_TRUE;
2823 
2824   a->size           = oldmat->size;
2825   a->rank           = oldmat->rank;
2826   a->donotstash     = oldmat->donotstash;
2827   a->roworiented    = oldmat->roworiented;
2828   a->rowindices     = 0;
2829   a->rowvalues      = 0;
2830   a->getrowactive   = PETSC_FALSE;
2831 
2832   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2833   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2834 
2835   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2836   if (oldmat->colmap) {
2837 #if defined (PETSC_USE_CTABLE)
2838     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2839 #else
2840     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2841     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2842     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2843 #endif
2844   } else a->colmap = 0;
2845   if (oldmat->garray) {
2846     PetscInt len;
2847     len  = oldmat->B->cmap->n;
2848     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2849     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2850     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2851   } else a->garray = 0;
2852 
2853   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2854   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2855   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2856   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2857   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2858   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2859   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2860   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2861   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2862   *newmat = mat;
2863   PetscFunctionReturn(0);
2864 }
2865 
2866 #include "petscsys.h"
2867 
2868 #undef __FUNCT__
2869 #define __FUNCT__ "MatLoad_MPIAIJ"
2870 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2871 {
2872   Mat            A;
2873   PetscScalar    *vals,*svals;
2874   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2875   MPI_Status     status;
2876   PetscErrorCode ierr;
2877   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2878   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2879   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2880   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2881   PetscInt       cend,cstart,n,*rowners;
2882   int            fd;
2883 
2884   PetscFunctionBegin;
2885   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2886   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2887   if (!rank) {
2888     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2889     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2890     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2891   }
2892 
2893   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2894   M = header[1]; N = header[2];
2895   /* determine ownership of all rows */
2896   m    = M/size + ((M % size) > rank);
2897   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2898   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2899 
2900   /* First process needs enough room for process with most rows */
2901   if (!rank) {
2902     mmax       = rowners[1];
2903     for (i=2; i<size; i++) {
2904       mmax = PetscMax(mmax,rowners[i]);
2905     }
2906   } else mmax = m;
2907 
2908   rowners[0] = 0;
2909   for (i=2; i<=size; i++) {
2910     rowners[i] += rowners[i-1];
2911   }
2912   rstart = rowners[rank];
2913   rend   = rowners[rank+1];
2914 
2915   /* distribute row lengths to all processors */
2916   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2917   if (!rank) {
2918     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2919     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2920     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2921     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2922     for (j=0; j<m; j++) {
2923       procsnz[0] += ourlens[j];
2924     }
2925     for (i=1; i<size; i++) {
2926       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2927       /* calculate the number of nonzeros on each processor */
2928       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2929         procsnz[i] += rowlengths[j];
2930       }
2931       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2932       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2933     }
2934     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2935   } else {
2936     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2937     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2938   }
2939 
2940   if (!rank) {
2941     /* determine max buffer needed and allocate it */
2942     maxnz = 0;
2943     for (i=0; i<size; i++) {
2944       maxnz = PetscMax(maxnz,procsnz[i]);
2945     }
2946     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2947 
2948     /* read in my part of the matrix column indices  */
2949     nz   = procsnz[0];
2950     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2951     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2952 
2953     /* read in every one elses and ship off */
2954     for (i=1; i<size; i++) {
2955       nz     = procsnz[i];
2956       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2957       mpicnt = PetscMPIIntCast(nz);
2958       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2959     }
2960     ierr = PetscFree(cols);CHKERRQ(ierr);
2961   } else {
2962     /* determine buffer space needed for message */
2963     nz = 0;
2964     for (i=0; i<m; i++) {
2965       nz += ourlens[i];
2966     }
2967     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2968 
2969     /* receive message of column indices*/
2970     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2971     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2972     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2973     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2974     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2975     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2976   }
2977 
2978   /* determine column ownership if matrix is not square */
2979   if (N != M) {
2980     n      = N/size + ((N % size) > rank);
2981     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2982     cstart = cend - n;
2983   } else {
2984     cstart = rstart;
2985     cend   = rend;
2986     n      = cend - cstart;
2987   }
2988 
2989   /* loop over local rows, determining number of off diagonal entries */
2990   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2991   jj = 0;
2992   for (i=0; i<m; i++) {
2993     for (j=0; j<ourlens[i]; j++) {
2994       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2995       jj++;
2996     }
2997   }
2998 
2999   /* create our matrix */
3000   for (i=0; i<m; i++) {
3001     ourlens[i] -= offlens[i];
3002   }
3003   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3004   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
3005   ierr = MatSetType(A,type);CHKERRQ(ierr);
3006   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
3007 
3008   for (i=0; i<m; i++) {
3009     ourlens[i] += offlens[i];
3010   }
3011 
3012   if (!rank) {
3013     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3014 
3015     /* read in my part of the matrix numerical values  */
3016     nz   = procsnz[0];
3017     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3018 
3019     /* insert into matrix */
3020     jj      = rstart;
3021     smycols = mycols;
3022     svals   = vals;
3023     for (i=0; i<m; i++) {
3024       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3025       smycols += ourlens[i];
3026       svals   += ourlens[i];
3027       jj++;
3028     }
3029 
3030     /* read in other processors and ship out */
3031     for (i=1; i<size; i++) {
3032       nz     = procsnz[i];
3033       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3034       mpicnt = PetscMPIIntCast(nz);
3035       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3036     }
3037     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3038   } else {
3039     /* receive numeric values */
3040     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3041 
3042     /* receive message of values*/
3043     mpicnt = PetscMPIIntCast(nz);
3044     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3045     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3046     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3047     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3048     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3049 
3050     /* insert into matrix */
3051     jj      = rstart;
3052     smycols = mycols;
3053     svals   = vals;
3054     for (i=0; i<m; i++) {
3055       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3056       smycols += ourlens[i];
3057       svals   += ourlens[i];
3058       jj++;
3059     }
3060   }
3061   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3062   ierr = PetscFree(vals);CHKERRQ(ierr);
3063   ierr = PetscFree(mycols);CHKERRQ(ierr);
3064   ierr = PetscFree(rowners);CHKERRQ(ierr);
3065 
3066   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3067   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3068   *newmat = A;
3069   PetscFunctionReturn(0);
3070 }
3071 
3072 #undef __FUNCT__
3073 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3074 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3075 {
3076   PetscErrorCode ierr;
3077   IS             iscol_local;
3078   PetscInt       csize;
3079 
3080   PetscFunctionBegin;
3081   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3082   if (call == MAT_REUSE_MATRIX) {
3083     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3084     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3085   } else {
3086     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3087   }
3088   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3089   if (call == MAT_INITIAL_MATRIX) {
3090     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3091     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3092   }
3093   PetscFunctionReturn(0);
3094 }
3095 
3096 #undef __FUNCT__
3097 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3098 /*
3099     Not great since it makes two copies of the submatrix, first an SeqAIJ
3100   in local and then by concatenating the local matrices the end result.
3101   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3102 
3103   Note: This requires a sequential iscol with all indices.
3104 */
3105 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3106 {
3107   PetscErrorCode ierr;
3108   PetscMPIInt    rank,size;
3109   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3110   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3111   Mat            *local,M,Mreuse;
3112   MatScalar      *vwork,*aa;
3113   MPI_Comm       comm = ((PetscObject)mat)->comm;
3114   Mat_SeqAIJ     *aij;
3115 
3116 
3117   PetscFunctionBegin;
3118   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3119   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3120 
3121   if (call ==  MAT_REUSE_MATRIX) {
3122     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3123     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3124     local = &Mreuse;
3125     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3126   } else {
3127     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3128     Mreuse = *local;
3129     ierr   = PetscFree(local);CHKERRQ(ierr);
3130   }
3131 
3132   /*
3133       m - number of local rows
3134       n - number of columns (same on all processors)
3135       rstart - first row in new global matrix generated
3136   */
3137   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3138   if (call == MAT_INITIAL_MATRIX) {
3139     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3140     ii  = aij->i;
3141     jj  = aij->j;
3142 
3143     /*
3144         Determine the number of non-zeros in the diagonal and off-diagonal
3145         portions of the matrix in order to do correct preallocation
3146     */
3147 
3148     /* first get start and end of "diagonal" columns */
3149     if (csize == PETSC_DECIDE) {
3150       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3151       if (mglobal == n) { /* square matrix */
3152 	nlocal = m;
3153       } else {
3154         nlocal = n/size + ((n % size) > rank);
3155       }
3156     } else {
3157       nlocal = csize;
3158     }
3159     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3160     rstart = rend - nlocal;
3161     if (rank == size - 1 && rend != n) {
3162       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3163     }
3164 
3165     /* next, compute all the lengths */
3166     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3167     olens = dlens + m;
3168     for (i=0; i<m; i++) {
3169       jend = ii[i+1] - ii[i];
3170       olen = 0;
3171       dlen = 0;
3172       for (j=0; j<jend; j++) {
3173         if (*jj < rstart || *jj >= rend) olen++;
3174         else dlen++;
3175         jj++;
3176       }
3177       olens[i] = olen;
3178       dlens[i] = dlen;
3179     }
3180     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3181     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3182     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3183     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3184     ierr = PetscFree(dlens);CHKERRQ(ierr);
3185   } else {
3186     PetscInt ml,nl;
3187 
3188     M = *newmat;
3189     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3190     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3191     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3192     /*
3193          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3194        rather than the slower MatSetValues().
3195     */
3196     M->was_assembled = PETSC_TRUE;
3197     M->assembled     = PETSC_FALSE;
3198   }
3199   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3200   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3201   ii  = aij->i;
3202   jj  = aij->j;
3203   aa  = aij->a;
3204   for (i=0; i<m; i++) {
3205     row   = rstart + i;
3206     nz    = ii[i+1] - ii[i];
3207     cwork = jj;     jj += nz;
3208     vwork = aa;     aa += nz;
3209     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3210   }
3211 
3212   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3213   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3214   *newmat = M;
3215 
3216   /* save submatrix used in processor for next request */
3217   if (call ==  MAT_INITIAL_MATRIX) {
3218     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3219     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3220   }
3221 
3222   PetscFunctionReturn(0);
3223 }
3224 
3225 EXTERN_C_BEGIN
3226 #undef __FUNCT__
3227 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3228 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3229 {
3230   PetscInt       m,cstart, cend,j,nnz,i,d;
3231   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3232   const PetscInt *JJ;
3233   PetscScalar    *values;
3234   PetscErrorCode ierr;
3235 
3236   PetscFunctionBegin;
3237   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3238 
3239   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3240   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3241   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3242   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3243   m      = B->rmap->n;
3244   cstart = B->cmap->rstart;
3245   cend   = B->cmap->rend;
3246   rstart = B->rmap->rstart;
3247 
3248   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3249   o_nnz = d_nnz + m;
3250 
3251 #if defined(PETSC_USE_DEBUGGING)
3252   for (i=0; i<m; i++) {
3253     nnz     = Ii[i+1]- Ii[i];
3254     JJ      = J + Ii[i];
3255     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3256     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3257     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3258   }
3259 #endif
3260 
3261   for (i=0; i<m; i++) {
3262     nnz     = Ii[i+1]- Ii[i];
3263     JJ      = J + Ii[i];
3264     nnz_max = PetscMax(nnz_max,nnz);
3265     d       = 0;
3266     for (j=0; j<nnz; j++) {
3267       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3268     }
3269     d_nnz[i] = d;
3270     o_nnz[i] = nnz - d;
3271   }
3272   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3273   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3274 
3275   if (v) values = (PetscScalar*)v;
3276   else {
3277     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3278     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3279   }
3280 
3281   for (i=0; i<m; i++) {
3282     ii   = i + rstart;
3283     nnz  = Ii[i+1]- Ii[i];
3284     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3285   }
3286   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3287   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3288 
3289   if (!v) {
3290     ierr = PetscFree(values);CHKERRQ(ierr);
3291   }
3292   PetscFunctionReturn(0);
3293 }
3294 EXTERN_C_END
3295 
3296 #undef __FUNCT__
3297 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3298 /*@
3299    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3300    (the default parallel PETSc format).
3301 
3302    Collective on MPI_Comm
3303 
3304    Input Parameters:
3305 +  B - the matrix
3306 .  i - the indices into j for the start of each local row (starts with zero)
3307 .  j - the column indices for each local row (starts with zero)
3308 -  v - optional values in the matrix
3309 
3310    Level: developer
3311 
3312    Notes:
3313        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3314      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3315      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3316 
3317        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3318 
3319        The format which is used for the sparse matrix input, is equivalent to a
3320     row-major ordering.. i.e for the following matrix, the input data expected is
3321     as shown:
3322 
3323         1 0 0
3324         2 0 3     P0
3325        -------
3326         4 5 6     P1
3327 
3328      Process0 [P0]: rows_owned=[0,1]
3329         i =  {0,1,3}  [size = nrow+1  = 2+1]
3330         j =  {0,0,2}  [size = nz = 6]
3331         v =  {1,2,3}  [size = nz = 6]
3332 
3333      Process1 [P1]: rows_owned=[2]
3334         i =  {0,3}    [size = nrow+1  = 1+1]
3335         j =  {0,1,2}  [size = nz = 6]
3336         v =  {4,5,6}  [size = nz = 6]
3337 
3338 .keywords: matrix, aij, compressed row, sparse, parallel
3339 
3340 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3341           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3342 @*/
3343 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3344 {
3345   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3346 
3347   PetscFunctionBegin;
3348   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3349   if (f) {
3350     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3351   }
3352   PetscFunctionReturn(0);
3353 }
3354 
3355 #undef __FUNCT__
3356 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3357 /*@C
3358    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3359    (the default parallel PETSc format).  For good matrix assembly performance
3360    the user should preallocate the matrix storage by setting the parameters
3361    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3362    performance can be increased by more than a factor of 50.
3363 
3364    Collective on MPI_Comm
3365 
3366    Input Parameters:
3367 +  A - the matrix
3368 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3369            (same value is used for all local rows)
3370 .  d_nnz - array containing the number of nonzeros in the various rows of the
3371            DIAGONAL portion of the local submatrix (possibly different for each row)
3372            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3373            The size of this array is equal to the number of local rows, i.e 'm'.
3374            You must leave room for the diagonal entry even if it is zero.
3375 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3376            submatrix (same value is used for all local rows).
3377 -  o_nnz - array containing the number of nonzeros in the various rows of the
3378            OFF-DIAGONAL portion of the local submatrix (possibly different for
3379            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3380            structure. The size of this array is equal to the number
3381            of local rows, i.e 'm'.
3382 
3383    If the *_nnz parameter is given then the *_nz parameter is ignored
3384 
3385    The AIJ format (also called the Yale sparse matrix format or
3386    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3387    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3388 
3389    The parallel matrix is partitioned such that the first m0 rows belong to
3390    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3391    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3392 
3393    The DIAGONAL portion of the local submatrix of a processor can be defined
3394    as the submatrix which is obtained by extraction the part corresponding
3395    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3396    first row that belongs to the processor, and r2 is the last row belonging
3397    to the this processor. This is a square mxm matrix. The remaining portion
3398    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3399 
3400    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3401 
3402    You can call MatGetInfo() to get information on how effective the preallocation was;
3403    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3404    You can also run with the option -info and look for messages with the string
3405    malloc in them to see if additional memory allocation was needed.
3406 
3407    Example usage:
3408 
3409    Consider the following 8x8 matrix with 34 non-zero values, that is
3410    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3411    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3412    as follows:
3413 
3414 .vb
3415             1  2  0  |  0  3  0  |  0  4
3416     Proc0   0  5  6  |  7  0  0  |  8  0
3417             9  0 10  | 11  0  0  | 12  0
3418     -------------------------------------
3419            13  0 14  | 15 16 17  |  0  0
3420     Proc1   0 18  0  | 19 20 21  |  0  0
3421             0  0  0  | 22 23  0  | 24  0
3422     -------------------------------------
3423     Proc2  25 26 27  |  0  0 28  | 29  0
3424            30  0  0  | 31 32 33  |  0 34
3425 .ve
3426 
3427    This can be represented as a collection of submatrices as:
3428 
3429 .vb
3430       A B C
3431       D E F
3432       G H I
3433 .ve
3434 
3435    Where the submatrices A,B,C are owned by proc0, D,E,F are
3436    owned by proc1, G,H,I are owned by proc2.
3437 
3438    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3439    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3440    The 'M','N' parameters are 8,8, and have the same values on all procs.
3441 
3442    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3443    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3444    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3445    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3446    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3447    matrix, ans [DF] as another SeqAIJ matrix.
3448 
3449    When d_nz, o_nz parameters are specified, d_nz storage elements are
3450    allocated for every row of the local diagonal submatrix, and o_nz
3451    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3452    One way to choose d_nz and o_nz is to use the max nonzerors per local
3453    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3454    In this case, the values of d_nz,o_nz are:
3455 .vb
3456      proc0 : dnz = 2, o_nz = 2
3457      proc1 : dnz = 3, o_nz = 2
3458      proc2 : dnz = 1, o_nz = 4
3459 .ve
3460    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3461    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3462    for proc3. i.e we are using 12+15+10=37 storage locations to store
3463    34 values.
3464 
3465    When d_nnz, o_nnz parameters are specified, the storage is specified
3466    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3467    In the above case the values for d_nnz,o_nnz are:
3468 .vb
3469      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3470      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3471      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3472 .ve
3473    Here the space allocated is sum of all the above values i.e 34, and
3474    hence pre-allocation is perfect.
3475 
3476    Level: intermediate
3477 
3478 .keywords: matrix, aij, compressed row, sparse, parallel
3479 
3480 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3481           MPIAIJ, MatGetInfo()
3482 @*/
3483 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3484 {
3485   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3486 
3487   PetscFunctionBegin;
3488   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3489   if (f) {
3490     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3491   }
3492   PetscFunctionReturn(0);
3493 }
3494 
3495 #undef __FUNCT__
3496 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3497 /*@
3498      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3499          CSR format the local rows.
3500 
3501    Collective on MPI_Comm
3502 
3503    Input Parameters:
3504 +  comm - MPI communicator
3505 .  m - number of local rows (Cannot be PETSC_DECIDE)
3506 .  n - This value should be the same as the local size used in creating the
3507        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3508        calculated if N is given) For square matrices n is almost always m.
3509 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3510 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3511 .   i - row indices
3512 .   j - column indices
3513 -   a - matrix values
3514 
3515    Output Parameter:
3516 .   mat - the matrix
3517 
3518    Level: intermediate
3519 
3520    Notes:
3521        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3522      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3523      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3524 
3525        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3526 
3527        The format which is used for the sparse matrix input, is equivalent to a
3528     row-major ordering.. i.e for the following matrix, the input data expected is
3529     as shown:
3530 
3531         1 0 0
3532         2 0 3     P0
3533        -------
3534         4 5 6     P1
3535 
3536      Process0 [P0]: rows_owned=[0,1]
3537         i =  {0,1,3}  [size = nrow+1  = 2+1]
3538         j =  {0,0,2}  [size = nz = 6]
3539         v =  {1,2,3}  [size = nz = 6]
3540 
3541      Process1 [P1]: rows_owned=[2]
3542         i =  {0,3}    [size = nrow+1  = 1+1]
3543         j =  {0,1,2}  [size = nz = 6]
3544         v =  {4,5,6}  [size = nz = 6]
3545 
3546 .keywords: matrix, aij, compressed row, sparse, parallel
3547 
3548 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3549           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3550 @*/
3551 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3552 {
3553   PetscErrorCode ierr;
3554 
3555  PetscFunctionBegin;
3556   if (i[0]) {
3557     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3558   }
3559   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3560   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3561   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3562   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3563   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3564   PetscFunctionReturn(0);
3565 }
3566 
3567 #undef __FUNCT__
3568 #define __FUNCT__ "MatCreateMPIAIJ"
3569 /*@C
3570    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3571    (the default parallel PETSc format).  For good matrix assembly performance
3572    the user should preallocate the matrix storage by setting the parameters
3573    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3574    performance can be increased by more than a factor of 50.
3575 
3576    Collective on MPI_Comm
3577 
3578    Input Parameters:
3579 +  comm - MPI communicator
3580 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3581            This value should be the same as the local size used in creating the
3582            y vector for the matrix-vector product y = Ax.
3583 .  n - This value should be the same as the local size used in creating the
3584        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3585        calculated if N is given) For square matrices n is almost always m.
3586 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3587 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3588 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3589            (same value is used for all local rows)
3590 .  d_nnz - array containing the number of nonzeros in the various rows of the
3591            DIAGONAL portion of the local submatrix (possibly different for each row)
3592            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3593            The size of this array is equal to the number of local rows, i.e 'm'.
3594            You must leave room for the diagonal entry even if it is zero.
3595 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3596            submatrix (same value is used for all local rows).
3597 -  o_nnz - array containing the number of nonzeros in the various rows of the
3598            OFF-DIAGONAL portion of the local submatrix (possibly different for
3599            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3600            structure. The size of this array is equal to the number
3601            of local rows, i.e 'm'.
3602 
3603    Output Parameter:
3604 .  A - the matrix
3605 
3606    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3607    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3608    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3609 
3610    Notes:
3611    If the *_nnz parameter is given then the *_nz parameter is ignored
3612 
3613    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3614    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3615    storage requirements for this matrix.
3616 
3617    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3618    processor than it must be used on all processors that share the object for
3619    that argument.
3620 
3621    The user MUST specify either the local or global matrix dimensions
3622    (possibly both).
3623 
3624    The parallel matrix is partitioned across processors such that the
3625    first m0 rows belong to process 0, the next m1 rows belong to
3626    process 1, the next m2 rows belong to process 2 etc.. where
3627    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3628    values corresponding to [m x N] submatrix.
3629 
3630    The columns are logically partitioned with the n0 columns belonging
3631    to 0th partition, the next n1 columns belonging to the next
3632    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3633 
3634    The DIAGONAL portion of the local submatrix on any given processor
3635    is the submatrix corresponding to the rows and columns m,n
3636    corresponding to the given processor. i.e diagonal matrix on
3637    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3638    etc. The remaining portion of the local submatrix [m x (N-n)]
3639    constitute the OFF-DIAGONAL portion. The example below better
3640    illustrates this concept.
3641 
3642    For a square global matrix we define each processor's diagonal portion
3643    to be its local rows and the corresponding columns (a square submatrix);
3644    each processor's off-diagonal portion encompasses the remainder of the
3645    local matrix (a rectangular submatrix).
3646 
3647    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3648 
3649    When calling this routine with a single process communicator, a matrix of
3650    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3651    type of communicator, use the construction mechanism:
3652      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3653 
3654    By default, this format uses inodes (identical nodes) when possible.
3655    We search for consecutive rows with the same nonzero structure, thereby
3656    reusing matrix information to achieve increased efficiency.
3657 
3658    Options Database Keys:
3659 +  -mat_no_inode  - Do not use inodes
3660 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3661 -  -mat_aij_oneindex - Internally use indexing starting at 1
3662         rather than 0.  Note that when calling MatSetValues(),
3663         the user still MUST index entries starting at 0!
3664 
3665 
3666    Example usage:
3667 
3668    Consider the following 8x8 matrix with 34 non-zero values, that is
3669    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3670    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3671    as follows:
3672 
3673 .vb
3674             1  2  0  |  0  3  0  |  0  4
3675     Proc0   0  5  6  |  7  0  0  |  8  0
3676             9  0 10  | 11  0  0  | 12  0
3677     -------------------------------------
3678            13  0 14  | 15 16 17  |  0  0
3679     Proc1   0 18  0  | 19 20 21  |  0  0
3680             0  0  0  | 22 23  0  | 24  0
3681     -------------------------------------
3682     Proc2  25 26 27  |  0  0 28  | 29  0
3683            30  0  0  | 31 32 33  |  0 34
3684 .ve
3685 
3686    This can be represented as a collection of submatrices as:
3687 
3688 .vb
3689       A B C
3690       D E F
3691       G H I
3692 .ve
3693 
3694    Where the submatrices A,B,C are owned by proc0, D,E,F are
3695    owned by proc1, G,H,I are owned by proc2.
3696 
3697    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3698    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3699    The 'M','N' parameters are 8,8, and have the same values on all procs.
3700 
3701    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3702    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3703    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3704    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3705    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3706    matrix, ans [DF] as another SeqAIJ matrix.
3707 
3708    When d_nz, o_nz parameters are specified, d_nz storage elements are
3709    allocated for every row of the local diagonal submatrix, and o_nz
3710    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3711    One way to choose d_nz and o_nz is to use the max nonzerors per local
3712    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3713    In this case, the values of d_nz,o_nz are:
3714 .vb
3715      proc0 : dnz = 2, o_nz = 2
3716      proc1 : dnz = 3, o_nz = 2
3717      proc2 : dnz = 1, o_nz = 4
3718 .ve
3719    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3720    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3721    for proc3. i.e we are using 12+15+10=37 storage locations to store
3722    34 values.
3723 
3724    When d_nnz, o_nnz parameters are specified, the storage is specified
3725    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3726    In the above case the values for d_nnz,o_nnz are:
3727 .vb
3728      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3729      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3730      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3731 .ve
3732    Here the space allocated is sum of all the above values i.e 34, and
3733    hence pre-allocation is perfect.
3734 
3735    Level: intermediate
3736 
3737 .keywords: matrix, aij, compressed row, sparse, parallel
3738 
3739 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3740           MPIAIJ, MatCreateMPIAIJWithArrays()
3741 @*/
3742 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3743 {
3744   PetscErrorCode ierr;
3745   PetscMPIInt    size;
3746 
3747   PetscFunctionBegin;
3748   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3749   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3750   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3751   if (size > 1) {
3752     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3753     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3754   } else {
3755     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3756     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3757   }
3758   PetscFunctionReturn(0);
3759 }
3760 
3761 #undef __FUNCT__
3762 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3763 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3764 {
3765   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3766 
3767   PetscFunctionBegin;
3768   *Ad     = a->A;
3769   *Ao     = a->B;
3770   *colmap = a->garray;
3771   PetscFunctionReturn(0);
3772 }
3773 
3774 #undef __FUNCT__
3775 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3776 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3777 {
3778   PetscErrorCode ierr;
3779   PetscInt       i;
3780   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3781 
3782   PetscFunctionBegin;
3783   if (coloring->ctype == IS_COLORING_GLOBAL) {
3784     ISColoringValue *allcolors,*colors;
3785     ISColoring      ocoloring;
3786 
3787     /* set coloring for diagonal portion */
3788     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3789 
3790     /* set coloring for off-diagonal portion */
3791     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3792     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3793     for (i=0; i<a->B->cmap->n; i++) {
3794       colors[i] = allcolors[a->garray[i]];
3795     }
3796     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3797     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3798     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3799     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3800   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3801     ISColoringValue *colors;
3802     PetscInt        *larray;
3803     ISColoring      ocoloring;
3804 
3805     /* set coloring for diagonal portion */
3806     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3807     for (i=0; i<a->A->cmap->n; i++) {
3808       larray[i] = i + A->cmap->rstart;
3809     }
3810     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3811     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3812     for (i=0; i<a->A->cmap->n; i++) {
3813       colors[i] = coloring->colors[larray[i]];
3814     }
3815     ierr = PetscFree(larray);CHKERRQ(ierr);
3816     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3817     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3818     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3819 
3820     /* set coloring for off-diagonal portion */
3821     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3822     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3823     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3824     for (i=0; i<a->B->cmap->n; i++) {
3825       colors[i] = coloring->colors[larray[i]];
3826     }
3827     ierr = PetscFree(larray);CHKERRQ(ierr);
3828     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3829     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3830     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3831   } else {
3832     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3833   }
3834 
3835   PetscFunctionReturn(0);
3836 }
3837 
3838 #if defined(PETSC_HAVE_ADIC)
3839 #undef __FUNCT__
3840 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3841 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3842 {
3843   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3844   PetscErrorCode ierr;
3845 
3846   PetscFunctionBegin;
3847   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3848   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3849   PetscFunctionReturn(0);
3850 }
3851 #endif
3852 
3853 #undef __FUNCT__
3854 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3855 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3856 {
3857   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3858   PetscErrorCode ierr;
3859 
3860   PetscFunctionBegin;
3861   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3862   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3863   PetscFunctionReturn(0);
3864 }
3865 
3866 #undef __FUNCT__
3867 #define __FUNCT__ "MatMerge"
3868 /*@
3869       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3870                  matrices from each processor
3871 
3872     Collective on MPI_Comm
3873 
3874    Input Parameters:
3875 +    comm - the communicators the parallel matrix will live on
3876 .    inmat - the input sequential matrices
3877 .    n - number of local columns (or PETSC_DECIDE)
3878 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3879 
3880    Output Parameter:
3881 .    outmat - the parallel matrix generated
3882 
3883     Level: advanced
3884 
3885    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3886 
3887 @*/
3888 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3889 {
3890   PetscErrorCode ierr;
3891   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3892   PetscInt       *indx;
3893   PetscScalar    *values;
3894 
3895   PetscFunctionBegin;
3896   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3897   if (scall == MAT_INITIAL_MATRIX){
3898     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3899     if (n == PETSC_DECIDE){
3900       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3901     }
3902     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3903     rstart -= m;
3904 
3905     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3906     for (i=0;i<m;i++) {
3907       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3908       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3909       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3910     }
3911     /* This routine will ONLY return MPIAIJ type matrix */
3912     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3913     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3914     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3915     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3916     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3917 
3918   } else if (scall == MAT_REUSE_MATRIX){
3919     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3920   } else {
3921     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3922   }
3923 
3924   for (i=0;i<m;i++) {
3925     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3926     Ii    = i + rstart;
3927     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3928     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3929   }
3930   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3931   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3932   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3933 
3934   PetscFunctionReturn(0);
3935 }
3936 
3937 #undef __FUNCT__
3938 #define __FUNCT__ "MatFileSplit"
3939 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3940 {
3941   PetscErrorCode    ierr;
3942   PetscMPIInt       rank;
3943   PetscInt          m,N,i,rstart,nnz;
3944   size_t            len;
3945   const PetscInt    *indx;
3946   PetscViewer       out;
3947   char              *name;
3948   Mat               B;
3949   const PetscScalar *values;
3950 
3951   PetscFunctionBegin;
3952   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3953   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3954   /* Should this be the type of the diagonal block of A? */
3955   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3956   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3957   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3958   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3959   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3960   for (i=0;i<m;i++) {
3961     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3962     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3963     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3964   }
3965   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3966   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3967 
3968   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3969   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3970   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3971   sprintf(name,"%s.%d",outfile,rank);
3972   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3973   ierr = PetscFree(name);
3974   ierr = MatView(B,out);CHKERRQ(ierr);
3975   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3976   ierr = MatDestroy(B);CHKERRQ(ierr);
3977   PetscFunctionReturn(0);
3978 }
3979 
3980 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3981 #undef __FUNCT__
3982 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3983 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3984 {
3985   PetscErrorCode       ierr;
3986   Mat_Merge_SeqsToMPI  *merge;
3987   PetscContainer       container;
3988 
3989   PetscFunctionBegin;
3990   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3991   if (container) {
3992     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3993     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3994     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4002     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4003     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4004 
4005     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4006     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4007   }
4008   ierr = PetscFree(merge);CHKERRQ(ierr);
4009 
4010   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4011   PetscFunctionReturn(0);
4012 }
4013 
4014 #include "../src/mat/utils/freespace.h"
4015 #include "petscbt.h"
4016 
4017 #undef __FUNCT__
4018 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4019 /*@C
4020       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4021                  matrices from each processor
4022 
4023     Collective on MPI_Comm
4024 
4025    Input Parameters:
4026 +    comm - the communicators the parallel matrix will live on
4027 .    seqmat - the input sequential matrices
4028 .    m - number of local rows (or PETSC_DECIDE)
4029 .    n - number of local columns (or PETSC_DECIDE)
4030 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4031 
4032    Output Parameter:
4033 .    mpimat - the parallel matrix generated
4034 
4035     Level: advanced
4036 
4037    Notes:
4038      The dimensions of the sequential matrix in each processor MUST be the same.
4039      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4040      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4041 @*/
4042 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4043 {
4044   PetscErrorCode       ierr;
4045   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4046   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4047   PetscMPIInt          size,rank,taga,*len_s;
4048   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4049   PetscInt             proc,m;
4050   PetscInt             **buf_ri,**buf_rj;
4051   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4052   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4053   MPI_Request          *s_waits,*r_waits;
4054   MPI_Status           *status;
4055   MatScalar            *aa=a->a;
4056   MatScalar            **abuf_r,*ba_i;
4057   Mat_Merge_SeqsToMPI  *merge;
4058   PetscContainer       container;
4059 
4060   PetscFunctionBegin;
4061   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4062 
4063   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4064   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4065 
4066   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4067   if (container) {
4068     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4069   }
4070   bi     = merge->bi;
4071   bj     = merge->bj;
4072   buf_ri = merge->buf_ri;
4073   buf_rj = merge->buf_rj;
4074 
4075   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4076   owners = merge->rowmap->range;
4077   len_s  = merge->len_s;
4078 
4079   /* send and recv matrix values */
4080   /*-----------------------------*/
4081   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4082   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4083 
4084   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4085   for (proc=0,k=0; proc<size; proc++){
4086     if (!len_s[proc]) continue;
4087     i = owners[proc];
4088     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4089     k++;
4090   }
4091 
4092   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4093   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4094   ierr = PetscFree(status);CHKERRQ(ierr);
4095 
4096   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4097   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4098 
4099   /* insert mat values of mpimat */
4100   /*----------------------------*/
4101   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4102   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4103   nextrow = buf_ri_k + merge->nrecv;
4104   nextai  = nextrow + merge->nrecv;
4105 
4106   for (k=0; k<merge->nrecv; k++){
4107     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4108     nrows = *(buf_ri_k[k]);
4109     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4110     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4111   }
4112 
4113   /* set values of ba */
4114   m = merge->rowmap->n;
4115   for (i=0; i<m; i++) {
4116     arow = owners[rank] + i;
4117     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4118     bnzi = bi[i+1] - bi[i];
4119     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4120 
4121     /* add local non-zero vals of this proc's seqmat into ba */
4122     anzi = ai[arow+1] - ai[arow];
4123     aj   = a->j + ai[arow];
4124     aa   = a->a + ai[arow];
4125     nextaj = 0;
4126     for (j=0; nextaj<anzi; j++){
4127       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4128         ba_i[j] += aa[nextaj++];
4129       }
4130     }
4131 
4132     /* add received vals into ba */
4133     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4134       /* i-th row */
4135       if (i == *nextrow[k]) {
4136         anzi = *(nextai[k]+1) - *nextai[k];
4137         aj   = buf_rj[k] + *(nextai[k]);
4138         aa   = abuf_r[k] + *(nextai[k]);
4139         nextaj = 0;
4140         for (j=0; nextaj<anzi; j++){
4141           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4142             ba_i[j] += aa[nextaj++];
4143           }
4144         }
4145         nextrow[k]++; nextai[k]++;
4146       }
4147     }
4148     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4149   }
4150   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4151   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4152 
4153   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4154   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4155   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4156   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4157   PetscFunctionReturn(0);
4158 }
4159 
4160 #undef __FUNCT__
4161 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4162 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4163 {
4164   PetscErrorCode       ierr;
4165   Mat                  B_mpi;
4166   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4167   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4168   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4169   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4170   PetscInt             len,proc,*dnz,*onz;
4171   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4172   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4173   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4174   MPI_Status           *status;
4175   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4176   PetscBT              lnkbt;
4177   Mat_Merge_SeqsToMPI  *merge;
4178   PetscContainer       container;
4179 
4180   PetscFunctionBegin;
4181   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4182 
4183   /* make sure it is a PETSc comm */
4184   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4185   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4186   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4187 
4188   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4189   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4190 
4191   /* determine row ownership */
4192   /*---------------------------------------------------------*/
4193   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4194   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4195   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4196   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4197   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4198   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4199   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4200 
4201   m      = merge->rowmap->n;
4202   M      = merge->rowmap->N;
4203   owners = merge->rowmap->range;
4204 
4205   /* determine the number of messages to send, their lengths */
4206   /*---------------------------------------------------------*/
4207   len_s  = merge->len_s;
4208 
4209   len = 0;  /* length of buf_si[] */
4210   merge->nsend = 0;
4211   for (proc=0; proc<size; proc++){
4212     len_si[proc] = 0;
4213     if (proc == rank){
4214       len_s[proc] = 0;
4215     } else {
4216       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4217       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4218     }
4219     if (len_s[proc]) {
4220       merge->nsend++;
4221       nrows = 0;
4222       for (i=owners[proc]; i<owners[proc+1]; i++){
4223         if (ai[i+1] > ai[i]) nrows++;
4224       }
4225       len_si[proc] = 2*(nrows+1);
4226       len += len_si[proc];
4227     }
4228   }
4229 
4230   /* determine the number and length of messages to receive for ij-structure */
4231   /*-------------------------------------------------------------------------*/
4232   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4233   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4234 
4235   /* post the Irecv of j-structure */
4236   /*-------------------------------*/
4237   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4238   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4239 
4240   /* post the Isend of j-structure */
4241   /*--------------------------------*/
4242   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4243   sj_waits = si_waits + merge->nsend;
4244 
4245   for (proc=0, k=0; proc<size; proc++){
4246     if (!len_s[proc]) continue;
4247     i = owners[proc];
4248     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4249     k++;
4250   }
4251 
4252   /* receives and sends of j-structure are complete */
4253   /*------------------------------------------------*/
4254   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4255   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4256 
4257   /* send and recv i-structure */
4258   /*---------------------------*/
4259   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4260   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4261 
4262   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4263   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4264   for (proc=0,k=0; proc<size; proc++){
4265     if (!len_s[proc]) continue;
4266     /* form outgoing message for i-structure:
4267          buf_si[0]:                 nrows to be sent
4268                [1:nrows]:           row index (global)
4269                [nrows+1:2*nrows+1]: i-structure index
4270     */
4271     /*-------------------------------------------*/
4272     nrows = len_si[proc]/2 - 1;
4273     buf_si_i    = buf_si + nrows+1;
4274     buf_si[0]   = nrows;
4275     buf_si_i[0] = 0;
4276     nrows = 0;
4277     for (i=owners[proc]; i<owners[proc+1]; i++){
4278       anzi = ai[i+1] - ai[i];
4279       if (anzi) {
4280         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4281         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4282         nrows++;
4283       }
4284     }
4285     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4286     k++;
4287     buf_si += len_si[proc];
4288   }
4289 
4290   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4291   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4292 
4293   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4294   for (i=0; i<merge->nrecv; i++){
4295     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4296   }
4297 
4298   ierr = PetscFree(len_si);CHKERRQ(ierr);
4299   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4300   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4301   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4302   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4303   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4304   ierr = PetscFree(status);CHKERRQ(ierr);
4305 
4306   /* compute a local seq matrix in each processor */
4307   /*----------------------------------------------*/
4308   /* allocate bi array and free space for accumulating nonzero column info */
4309   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4310   bi[0] = 0;
4311 
4312   /* create and initialize a linked list */
4313   nlnk = N+1;
4314   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4315 
4316   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4317   len = 0;
4318   len  = ai[owners[rank+1]] - ai[owners[rank]];
4319   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4320   current_space = free_space;
4321 
4322   /* determine symbolic info for each local row */
4323   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4324   nextrow = buf_ri_k + merge->nrecv;
4325   nextai  = nextrow + merge->nrecv;
4326   for (k=0; k<merge->nrecv; k++){
4327     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4328     nrows = *buf_ri_k[k];
4329     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4330     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4331   }
4332 
4333   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4334   len = 0;
4335   for (i=0;i<m;i++) {
4336     bnzi   = 0;
4337     /* add local non-zero cols of this proc's seqmat into lnk */
4338     arow   = owners[rank] + i;
4339     anzi   = ai[arow+1] - ai[arow];
4340     aj     = a->j + ai[arow];
4341     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4342     bnzi += nlnk;
4343     /* add received col data into lnk */
4344     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4345       if (i == *nextrow[k]) { /* i-th row */
4346         anzi = *(nextai[k]+1) - *nextai[k];
4347         aj   = buf_rj[k] + *nextai[k];
4348         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4349         bnzi += nlnk;
4350         nextrow[k]++; nextai[k]++;
4351       }
4352     }
4353     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4354 
4355     /* if free space is not available, make more free space */
4356     if (current_space->local_remaining<bnzi) {
4357       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4358       nspacedouble++;
4359     }
4360     /* copy data into free space, then initialize lnk */
4361     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4362     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4363 
4364     current_space->array           += bnzi;
4365     current_space->local_used      += bnzi;
4366     current_space->local_remaining -= bnzi;
4367 
4368     bi[i+1] = bi[i] + bnzi;
4369   }
4370 
4371   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4372 
4373   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4374   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4375   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4376 
4377   /* create symbolic parallel matrix B_mpi */
4378   /*---------------------------------------*/
4379   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4380   if (n==PETSC_DECIDE) {
4381     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4382   } else {
4383     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4384   }
4385   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4386   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4387   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4388 
4389   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4390   B_mpi->assembled     = PETSC_FALSE;
4391   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4392   merge->bi            = bi;
4393   merge->bj            = bj;
4394   merge->buf_ri        = buf_ri;
4395   merge->buf_rj        = buf_rj;
4396   merge->coi           = PETSC_NULL;
4397   merge->coj           = PETSC_NULL;
4398   merge->owners_co     = PETSC_NULL;
4399 
4400   /* attach the supporting struct to B_mpi for reuse */
4401   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4402   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4403   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4404   *mpimat = B_mpi;
4405 
4406   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4407   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4408   PetscFunctionReturn(0);
4409 }
4410 
4411 #undef __FUNCT__
4412 #define __FUNCT__ "MatMerge_SeqsToMPI"
4413 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4414 {
4415   PetscErrorCode   ierr;
4416 
4417   PetscFunctionBegin;
4418   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4419   if (scall == MAT_INITIAL_MATRIX){
4420     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4421   }
4422   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4423   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4424   PetscFunctionReturn(0);
4425 }
4426 
4427 #undef __FUNCT__
4428 #define __FUNCT__ "MatGetLocalMat"
4429 /*@
4430      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4431 
4432     Not Collective
4433 
4434    Input Parameters:
4435 +    A - the matrix
4436 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4437 
4438    Output Parameter:
4439 .    A_loc - the local sequential matrix generated
4440 
4441     Level: developer
4442 
4443 @*/
4444 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4445 {
4446   PetscErrorCode  ierr;
4447   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4448   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4449   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4450   MatScalar       *aa=a->a,*ba=b->a,*cam;
4451   PetscScalar     *ca;
4452   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4453   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4454 
4455   PetscFunctionBegin;
4456   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4457   if (scall == MAT_INITIAL_MATRIX){
4458     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4459     ci[0] = 0;
4460     for (i=0; i<am; i++){
4461       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4462     }
4463     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4464     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4465     k = 0;
4466     for (i=0; i<am; i++) {
4467       ncols_o = bi[i+1] - bi[i];
4468       ncols_d = ai[i+1] - ai[i];
4469       /* off-diagonal portion of A */
4470       for (jo=0; jo<ncols_o; jo++) {
4471         col = cmap[*bj];
4472         if (col >= cstart) break;
4473         cj[k]   = col; bj++;
4474         ca[k++] = *ba++;
4475       }
4476       /* diagonal portion of A */
4477       for (j=0; j<ncols_d; j++) {
4478         cj[k]   = cstart + *aj++;
4479         ca[k++] = *aa++;
4480       }
4481       /* off-diagonal portion of A */
4482       for (j=jo; j<ncols_o; j++) {
4483         cj[k]   = cmap[*bj++];
4484         ca[k++] = *ba++;
4485       }
4486     }
4487     /* put together the new matrix */
4488     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4489     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4490     /* Since these are PETSc arrays, change flags to free them as necessary. */
4491     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4492     mat->free_a  = PETSC_TRUE;
4493     mat->free_ij = PETSC_TRUE;
4494     mat->nonew   = 0;
4495   } else if (scall == MAT_REUSE_MATRIX){
4496     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4497     ci = mat->i; cj = mat->j; cam = mat->a;
4498     for (i=0; i<am; i++) {
4499       /* off-diagonal portion of A */
4500       ncols_o = bi[i+1] - bi[i];
4501       for (jo=0; jo<ncols_o; jo++) {
4502         col = cmap[*bj];
4503         if (col >= cstart) break;
4504         *cam++ = *ba++; bj++;
4505       }
4506       /* diagonal portion of A */
4507       ncols_d = ai[i+1] - ai[i];
4508       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4509       /* off-diagonal portion of A */
4510       for (j=jo; j<ncols_o; j++) {
4511         *cam++ = *ba++; bj++;
4512       }
4513     }
4514   } else {
4515     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4516   }
4517 
4518   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4519   PetscFunctionReturn(0);
4520 }
4521 
4522 #undef __FUNCT__
4523 #define __FUNCT__ "MatGetLocalMatCondensed"
4524 /*@C
4525      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4526 
4527     Not Collective
4528 
4529    Input Parameters:
4530 +    A - the matrix
4531 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4532 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4533 
4534    Output Parameter:
4535 .    A_loc - the local sequential matrix generated
4536 
4537     Level: developer
4538 
4539 @*/
4540 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4541 {
4542   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4543   PetscErrorCode    ierr;
4544   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4545   IS                isrowa,iscola;
4546   Mat               *aloc;
4547 
4548   PetscFunctionBegin;
4549   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4550   if (!row){
4551     start = A->rmap->rstart; end = A->rmap->rend;
4552     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4553   } else {
4554     isrowa = *row;
4555   }
4556   if (!col){
4557     start = A->cmap->rstart;
4558     cmap  = a->garray;
4559     nzA   = a->A->cmap->n;
4560     nzB   = a->B->cmap->n;
4561     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4562     ncols = 0;
4563     for (i=0; i<nzB; i++) {
4564       if (cmap[i] < start) idx[ncols++] = cmap[i];
4565       else break;
4566     }
4567     imark = i;
4568     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4569     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4570     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4571     ierr = PetscFree(idx);CHKERRQ(ierr);
4572   } else {
4573     iscola = *col;
4574   }
4575   if (scall != MAT_INITIAL_MATRIX){
4576     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4577     aloc[0] = *A_loc;
4578   }
4579   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4580   *A_loc = aloc[0];
4581   ierr = PetscFree(aloc);CHKERRQ(ierr);
4582   if (!row){
4583     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4584   }
4585   if (!col){
4586     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4587   }
4588   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4589   PetscFunctionReturn(0);
4590 }
4591 
4592 #undef __FUNCT__
4593 #define __FUNCT__ "MatGetBrowsOfAcols"
4594 /*@C
4595     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4596 
4597     Collective on Mat
4598 
4599    Input Parameters:
4600 +    A,B - the matrices in mpiaij format
4601 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4602 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4603 
4604    Output Parameter:
4605 +    rowb, colb - index sets of rows and columns of B to extract
4606 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4607 -    B_seq - the sequential matrix generated
4608 
4609     Level: developer
4610 
4611 @*/
4612 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4613 {
4614   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4615   PetscErrorCode    ierr;
4616   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4617   IS                isrowb,iscolb;
4618   Mat               *bseq;
4619 
4620   PetscFunctionBegin;
4621   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4622     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4623   }
4624   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4625 
4626   if (scall == MAT_INITIAL_MATRIX){
4627     start = A->cmap->rstart;
4628     cmap  = a->garray;
4629     nzA   = a->A->cmap->n;
4630     nzB   = a->B->cmap->n;
4631     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4632     ncols = 0;
4633     for (i=0; i<nzB; i++) {  /* row < local row index */
4634       if (cmap[i] < start) idx[ncols++] = cmap[i];
4635       else break;
4636     }
4637     imark = i;
4638     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4639     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4640     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4641     ierr = PetscFree(idx);CHKERRQ(ierr);
4642     *brstart = imark;
4643     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4644   } else {
4645     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4646     isrowb = *rowb; iscolb = *colb;
4647     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4648     bseq[0] = *B_seq;
4649   }
4650   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4651   *B_seq = bseq[0];
4652   ierr = PetscFree(bseq);CHKERRQ(ierr);
4653   if (!rowb){
4654     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4655   } else {
4656     *rowb = isrowb;
4657   }
4658   if (!colb){
4659     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4660   } else {
4661     *colb = iscolb;
4662   }
4663   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4664   PetscFunctionReturn(0);
4665 }
4666 
4667 #undef __FUNCT__
4668 #define __FUNCT__ "MatGetBrowsOfAoCols"
4669 /*@C
4670     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4671     of the OFF-DIAGONAL portion of local A
4672 
4673     Collective on Mat
4674 
4675    Input Parameters:
4676 +    A,B - the matrices in mpiaij format
4677 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4678 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4679 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4680 
4681    Output Parameter:
4682 +    B_oth - the sequential matrix generated
4683 
4684     Level: developer
4685 
4686 @*/
4687 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4688 {
4689   VecScatter_MPI_General *gen_to,*gen_from;
4690   PetscErrorCode         ierr;
4691   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4692   Mat_SeqAIJ             *b_oth;
4693   VecScatter             ctx=a->Mvctx;
4694   MPI_Comm               comm=((PetscObject)ctx)->comm;
4695   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4696   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4697   PetscScalar            *rvalues,*svalues;
4698   MatScalar              *b_otha,*bufa,*bufA;
4699   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4700   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4701   MPI_Status             *sstatus,rstatus;
4702   PetscMPIInt            jj;
4703   PetscInt               *cols,sbs,rbs;
4704   PetscScalar            *vals;
4705 
4706   PetscFunctionBegin;
4707   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4708     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4709   }
4710   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4711   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4712 
4713   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4714   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4715   rvalues  = gen_from->values; /* holds the length of receiving row */
4716   svalues  = gen_to->values;   /* holds the length of sending row */
4717   nrecvs   = gen_from->n;
4718   nsends   = gen_to->n;
4719 
4720   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4721   srow     = gen_to->indices;   /* local row index to be sent */
4722   sstarts  = gen_to->starts;
4723   sprocs   = gen_to->procs;
4724   sstatus  = gen_to->sstatus;
4725   sbs      = gen_to->bs;
4726   rstarts  = gen_from->starts;
4727   rprocs   = gen_from->procs;
4728   rbs      = gen_from->bs;
4729 
4730   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4731   if (scall == MAT_INITIAL_MATRIX){
4732     /* i-array */
4733     /*---------*/
4734     /*  post receives */
4735     for (i=0; i<nrecvs; i++){
4736       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4737       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4738       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4739     }
4740 
4741     /* pack the outgoing message */
4742     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4743     rstartsj = sstartsj + nsends +1;
4744     sstartsj[0] = 0;  rstartsj[0] = 0;
4745     len = 0; /* total length of j or a array to be sent */
4746     k = 0;
4747     for (i=0; i<nsends; i++){
4748       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4749       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4750       for (j=0; j<nrows; j++) {
4751         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4752         for (l=0; l<sbs; l++){
4753           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4754           rowlen[j*sbs+l] = ncols;
4755           len += ncols;
4756           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4757         }
4758         k++;
4759       }
4760       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4761       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4762     }
4763     /* recvs and sends of i-array are completed */
4764     i = nrecvs;
4765     while (i--) {
4766       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4767     }
4768     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4769 
4770     /* allocate buffers for sending j and a arrays */
4771     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4772     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4773 
4774     /* create i-array of B_oth */
4775     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4776     b_othi[0] = 0;
4777     len = 0; /* total length of j or a array to be received */
4778     k = 0;
4779     for (i=0; i<nrecvs; i++){
4780       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4781       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4782       for (j=0; j<nrows; j++) {
4783         b_othi[k+1] = b_othi[k] + rowlen[j];
4784         len += rowlen[j]; k++;
4785       }
4786       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4787     }
4788 
4789     /* allocate space for j and a arrrays of B_oth */
4790     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4791     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4792 
4793     /* j-array */
4794     /*---------*/
4795     /*  post receives of j-array */
4796     for (i=0; i<nrecvs; i++){
4797       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4798       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4799     }
4800 
4801     /* pack the outgoing message j-array */
4802     k = 0;
4803     for (i=0; i<nsends; i++){
4804       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4805       bufJ = bufj+sstartsj[i];
4806       for (j=0; j<nrows; j++) {
4807         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4808         for (ll=0; ll<sbs; ll++){
4809           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4810           for (l=0; l<ncols; l++){
4811             *bufJ++ = cols[l];
4812           }
4813           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4814         }
4815       }
4816       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4817     }
4818 
4819     /* recvs and sends of j-array are completed */
4820     i = nrecvs;
4821     while (i--) {
4822       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4823     }
4824     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4825   } else if (scall == MAT_REUSE_MATRIX){
4826     sstartsj = *startsj;
4827     rstartsj = sstartsj + nsends +1;
4828     bufa     = *bufa_ptr;
4829     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4830     b_otha   = b_oth->a;
4831   } else {
4832     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4833   }
4834 
4835   /* a-array */
4836   /*---------*/
4837   /*  post receives of a-array */
4838   for (i=0; i<nrecvs; i++){
4839     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4840     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4841   }
4842 
4843   /* pack the outgoing message a-array */
4844   k = 0;
4845   for (i=0; i<nsends; i++){
4846     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4847     bufA = bufa+sstartsj[i];
4848     for (j=0; j<nrows; j++) {
4849       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4850       for (ll=0; ll<sbs; ll++){
4851         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4852         for (l=0; l<ncols; l++){
4853           *bufA++ = vals[l];
4854         }
4855         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4856       }
4857     }
4858     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4859   }
4860   /* recvs and sends of a-array are completed */
4861   i = nrecvs;
4862   while (i--) {
4863     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4864   }
4865   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4866   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4867 
4868   if (scall == MAT_INITIAL_MATRIX){
4869     /* put together the new matrix */
4870     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4871 
4872     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4873     /* Since these are PETSc arrays, change flags to free them as necessary. */
4874     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4875     b_oth->free_a  = PETSC_TRUE;
4876     b_oth->free_ij = PETSC_TRUE;
4877     b_oth->nonew   = 0;
4878 
4879     ierr = PetscFree(bufj);CHKERRQ(ierr);
4880     if (!startsj || !bufa_ptr){
4881       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4882       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4883     } else {
4884       *startsj  = sstartsj;
4885       *bufa_ptr = bufa;
4886     }
4887   }
4888   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4889   PetscFunctionReturn(0);
4890 }
4891 
4892 #undef __FUNCT__
4893 #define __FUNCT__ "MatGetCommunicationStructs"
4894 /*@C
4895   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4896 
4897   Not Collective
4898 
4899   Input Parameters:
4900 . A - The matrix in mpiaij format
4901 
4902   Output Parameter:
4903 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4904 . colmap - A map from global column index to local index into lvec
4905 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4906 
4907   Level: developer
4908 
4909 @*/
4910 #if defined (PETSC_USE_CTABLE)
4911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4912 #else
4913 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4914 #endif
4915 {
4916   Mat_MPIAIJ *a;
4917 
4918   PetscFunctionBegin;
4919   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4920   PetscValidPointer(lvec, 2)
4921   PetscValidPointer(colmap, 3)
4922   PetscValidPointer(multScatter, 4)
4923   a = (Mat_MPIAIJ *) A->data;
4924   if (lvec) *lvec = a->lvec;
4925   if (colmap) *colmap = a->colmap;
4926   if (multScatter) *multScatter = a->Mvctx;
4927   PetscFunctionReturn(0);
4928 }
4929 
4930 EXTERN_C_BEGIN
4931 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4932 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4933 EXTERN_C_END
4934 
4935 #undef __FUNCT__
4936 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4937 /*
4938     Computes (B'*A')' since computing B*A directly is untenable
4939 
4940                n                       p                          p
4941         (              )       (              )         (                  )
4942       m (      A       )  *  n (       B      )   =   m (         C        )
4943         (              )       (              )         (                  )
4944 
4945 */
4946 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4947 {
4948   PetscErrorCode     ierr;
4949   Mat                At,Bt,Ct;
4950 
4951   PetscFunctionBegin;
4952   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4953   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4954   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4955   ierr = MatDestroy(At);CHKERRQ(ierr);
4956   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4957   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4958   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4959   PetscFunctionReturn(0);
4960 }
4961 
4962 #undef __FUNCT__
4963 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4964 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4965 {
4966   PetscErrorCode ierr;
4967   PetscInt       m=A->rmap->n,n=B->cmap->n;
4968   Mat            Cmat;
4969 
4970   PetscFunctionBegin;
4971   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4972   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4973   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4974   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4975   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4976   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4977   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4978   *C   = Cmat;
4979   PetscFunctionReturn(0);
4980 }
4981 
4982 /* ----------------------------------------------------------------*/
4983 #undef __FUNCT__
4984 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4985 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4986 {
4987   PetscErrorCode ierr;
4988 
4989   PetscFunctionBegin;
4990   if (scall == MAT_INITIAL_MATRIX){
4991     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4992   }
4993   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4994   PetscFunctionReturn(0);
4995 }
4996 
4997 EXTERN_C_BEGIN
4998 #if defined(PETSC_HAVE_MUMPS)
4999 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
5000 #endif
5001 #if defined(PETSC_HAVE_PASTIX)
5002 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5003 #endif
5004 #if defined(PETSC_HAVE_SUPERLU_DIST)
5005 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5006 #endif
5007 #if defined(PETSC_HAVE_SPOOLES)
5008 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5009 #endif
5010 EXTERN_C_END
5011 
5012 /*MC
5013    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5014 
5015    Options Database Keys:
5016 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5017 
5018   Level: beginner
5019 
5020 .seealso: MatCreateMPIAIJ()
5021 M*/
5022 
5023 EXTERN_C_BEGIN
5024 #undef __FUNCT__
5025 #define __FUNCT__ "MatCreate_MPIAIJ"
5026 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5027 {
5028   Mat_MPIAIJ     *b;
5029   PetscErrorCode ierr;
5030   PetscMPIInt    size;
5031 
5032   PetscFunctionBegin;
5033   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5034 
5035   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5036   B->data         = (void*)b;
5037   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5038   B->rmap->bs     = 1;
5039   B->assembled    = PETSC_FALSE;
5040   B->mapping      = 0;
5041 
5042   B->insertmode   = NOT_SET_VALUES;
5043   b->size         = size;
5044   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5045 
5046   /* build cache for off array entries formed */
5047   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5048   b->donotstash  = PETSC_FALSE;
5049   b->colmap      = 0;
5050   b->garray      = 0;
5051   b->roworiented = PETSC_TRUE;
5052 
5053   /* stuff used for matrix vector multiply */
5054   b->lvec      = PETSC_NULL;
5055   b->Mvctx     = PETSC_NULL;
5056 
5057   /* stuff for MatGetRow() */
5058   b->rowindices   = 0;
5059   b->rowvalues    = 0;
5060   b->getrowactive = PETSC_FALSE;
5061 
5062 #if defined(PETSC_HAVE_SPOOLES)
5063   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5064                                      "MatGetFactor_mpiaij_spooles",
5065                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5066 #endif
5067 #if defined(PETSC_HAVE_MUMPS)
5068   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5069                                      "MatGetFactor_mpiaij_mumps",
5070                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5071 #endif
5072 #if defined(PETSC_HAVE_PASTIX)
5073   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5074 					   "MatGetFactor_mpiaij_pastix",
5075 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5076 #endif
5077 #if defined(PETSC_HAVE_SUPERLU_DIST)
5078   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5079                                      "MatGetFactor_mpiaij_superlu_dist",
5080                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5081 #endif
5082   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5083                                      "MatStoreValues_MPIAIJ",
5084                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5085   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5086                                      "MatRetrieveValues_MPIAIJ",
5087                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5088   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5089 				     "MatGetDiagonalBlock_MPIAIJ",
5090                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5091   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5092 				     "MatIsTranspose_MPIAIJ",
5093 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5094   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5095 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5096 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5097   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5098 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5099 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5100   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5101 				     "MatDiagonalScaleLocal_MPIAIJ",
5102 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5103   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5104                                      "MatConvert_MPIAIJ_MPICSRPERM",
5105                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5106   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5107                                      "MatConvert_MPIAIJ_MPICRL",
5108                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5109   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5110                                      "MatConvert_MPIAIJ_MPISBAIJ",
5111                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5112   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5113                                      "MatMatMult_MPIDense_MPIAIJ",
5114                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5115   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5116                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5117                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5118   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5119                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5120                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5121   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5122   PetscFunctionReturn(0);
5123 }
5124 EXTERN_C_END
5125 
5126 #undef __FUNCT__
5127 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5128 /*@
5129      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5130          and "off-diagonal" part of the matrix in CSR format.
5131 
5132    Collective on MPI_Comm
5133 
5134    Input Parameters:
5135 +  comm - MPI communicator
5136 .  m - number of local rows (Cannot be PETSC_DECIDE)
5137 .  n - This value should be the same as the local size used in creating the
5138        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5139        calculated if N is given) For square matrices n is almost always m.
5140 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5141 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5142 .   i - row indices for "diagonal" portion of matrix
5143 .   j - column indices
5144 .   a - matrix values
5145 .   oi - row indices for "off-diagonal" portion of matrix
5146 .   oj - column indices
5147 -   oa - matrix values
5148 
5149    Output Parameter:
5150 .   mat - the matrix
5151 
5152    Level: advanced
5153 
5154    Notes:
5155        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5156 
5157        The i and j indices are 0 based
5158 
5159        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5160 
5161        This sets local rows and cannot be used to set off-processor values.
5162 
5163        You cannot later use MatSetValues() to change values in this matrix.
5164 
5165 .keywords: matrix, aij, compressed row, sparse, parallel
5166 
5167 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5168           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5169 @*/
5170 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5171 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5172 {
5173   PetscErrorCode ierr;
5174   Mat_MPIAIJ     *maij;
5175 
5176  PetscFunctionBegin;
5177   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5178   if (i[0]) {
5179     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5180   }
5181   if (oi[0]) {
5182     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5183   }
5184   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5185   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5186   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5187   maij = (Mat_MPIAIJ*) (*mat)->data;
5188   maij->donotstash     = PETSC_TRUE;
5189   (*mat)->preallocated = PETSC_TRUE;
5190 
5191   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5192   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5193   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5194   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5195 
5196   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5197   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5198 
5199   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5200   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5201   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5202   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5203 
5204   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5205   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5206   PetscFunctionReturn(0);
5207 }
5208 
5209 /*
5210     Special version for direct calls from Fortran
5211 */
5212 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5213 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5214 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5215 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5216 #endif
5217 
5218 /* Change these macros so can be used in void function */
5219 #undef CHKERRQ
5220 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5221 #undef SETERRQ2
5222 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5223 #undef SETERRQ
5224 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5225 
5226 EXTERN_C_BEGIN
5227 #undef __FUNCT__
5228 #define __FUNCT__ "matsetvaluesmpiaij_"
5229 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5230 {
5231   Mat             mat = *mmat;
5232   PetscInt        m = *mm, n = *mn;
5233   InsertMode      addv = *maddv;
5234   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5235   PetscScalar     value;
5236   PetscErrorCode  ierr;
5237 
5238   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5239   if (mat->insertmode == NOT_SET_VALUES) {
5240     mat->insertmode = addv;
5241   }
5242 #if defined(PETSC_USE_DEBUG)
5243   else if (mat->insertmode != addv) {
5244     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5245   }
5246 #endif
5247   {
5248   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5249   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5250   PetscTruth      roworiented = aij->roworiented;
5251 
5252   /* Some Variables required in the macro */
5253   Mat             A = aij->A;
5254   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5255   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5256   MatScalar       *aa = a->a;
5257   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5258   Mat             B = aij->B;
5259   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5260   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5261   MatScalar       *ba = b->a;
5262 
5263   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5264   PetscInt        nonew = a->nonew;
5265   MatScalar       *ap1,*ap2;
5266 
5267   PetscFunctionBegin;
5268   for (i=0; i<m; i++) {
5269     if (im[i] < 0) continue;
5270 #if defined(PETSC_USE_DEBUG)
5271     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5272 #endif
5273     if (im[i] >= rstart && im[i] < rend) {
5274       row      = im[i] - rstart;
5275       lastcol1 = -1;
5276       rp1      = aj + ai[row];
5277       ap1      = aa + ai[row];
5278       rmax1    = aimax[row];
5279       nrow1    = ailen[row];
5280       low1     = 0;
5281       high1    = nrow1;
5282       lastcol2 = -1;
5283       rp2      = bj + bi[row];
5284       ap2      = ba + bi[row];
5285       rmax2    = bimax[row];
5286       nrow2    = bilen[row];
5287       low2     = 0;
5288       high2    = nrow2;
5289 
5290       for (j=0; j<n; j++) {
5291         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5292         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5293         if (in[j] >= cstart && in[j] < cend){
5294           col = in[j] - cstart;
5295           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5296         } else if (in[j] < 0) continue;
5297 #if defined(PETSC_USE_DEBUG)
5298         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5299 #endif
5300         else {
5301           if (mat->was_assembled) {
5302             if (!aij->colmap) {
5303               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5304             }
5305 #if defined (PETSC_USE_CTABLE)
5306             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5307 	    col--;
5308 #else
5309             col = aij->colmap[in[j]] - 1;
5310 #endif
5311             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5312               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5313               col =  in[j];
5314               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5315               B = aij->B;
5316               b = (Mat_SeqAIJ*)B->data;
5317               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5318               rp2      = bj + bi[row];
5319               ap2      = ba + bi[row];
5320               rmax2    = bimax[row];
5321               nrow2    = bilen[row];
5322               low2     = 0;
5323               high2    = nrow2;
5324               bm       = aij->B->rmap->n;
5325               ba = b->a;
5326             }
5327           } else col = in[j];
5328           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5329         }
5330       }
5331     } else {
5332       if (!aij->donotstash) {
5333         if (roworiented) {
5334           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5335         } else {
5336           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5337         }
5338       }
5339     }
5340   }}
5341   PetscFunctionReturnVoid();
5342 }
5343 EXTERN_C_END
5344 
5345