xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b06ca6b09b3ae4e8a93ed49db16f4412dd6db9d8)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatDistribute_MPIAIJ"
7 /*
8     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
9     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
10 
11     Only for square matrices
12 */
13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
14 {
15   PetscMPIInt    rank,size;
16   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
17   PetscErrorCode ierr;
18   Mat            mat;
19   Mat_SeqAIJ     *gmata;
20   PetscMPIInt    tag;
21   MPI_Status     status;
22   PetscTruth     aij;
23   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
24 
25   PetscFunctionBegin;
26   CHKMEMQ;
27   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
28   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
29   if (!rank) {
30     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
31     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
32   }
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
35     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
36     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
37     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
38     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
39     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
40     rowners[0] = 0;
41     for (i=2; i<=size; i++) {
42       rowners[i] += rowners[i-1];
43     }
44     rstart = rowners[rank];
45     rend   = rowners[rank+1];
46     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
47     if (!rank) {
48       gmata = (Mat_SeqAIJ*) gmat->data;
49       /* send row lengths to all processors */
50       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
51       for (i=1; i<size; i++) {
52 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
53       }
54       /* determine number diagonal and off-diagonal counts */
55       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
56       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
57       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
58       jj = 0;
59       for (i=0; i<m; i++) {
60 	for (j=0; j<dlens[i]; j++) {
61           if (gmata->j[jj] < rstart) ld[i]++;
62 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
63 	  jj++;
64 	}
65       }
66       /* send column indices to other processes */
67       for (i=1; i<size; i++) {
68 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
69 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
70 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71       }
72 
73       /* send numerical values to other processes */
74       for (i=1; i<size; i++) {
75         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
76         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
77       }
78       gmataa = gmata->a;
79       gmataj = gmata->j;
80 
81     } else {
82       /* receive row lengths */
83       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
84       /* receive column indices */
85       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
86       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
87       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
88       /* determine number diagonal and off-diagonal counts */
89       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
90       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
91       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
92       jj = 0;
93       for (i=0; i<m; i++) {
94 	for (j=0; j<dlens[i]; j++) {
95           if (gmataj[jj] < rstart) ld[i]++;
96 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
97 	  jj++;
98 	}
99       }
100       /* receive numerical values */
101       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
103     }
104     /* set preallocation */
105     for (i=0; i<m; i++) {
106       dlens[i] -= olens[i];
107     }
108     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
109     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
110 
111     for (i=0; i<m; i++) {
112       dlens[i] += olens[i];
113     }
114     cnt  = 0;
115     for (i=0; i<m; i++) {
116       row  = rstart + i;
117       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
118       cnt += dlens[i];
119     }
120     if (rank) {
121       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
122     }
123     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
124     ierr = PetscFree(rowners);CHKERRQ(ierr);
125     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
126     *inmat = mat;
127   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
128     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
129     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
130     mat   = *inmat;
131     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
132     if (!rank) {
133       /* send numerical values to other processes */
134       gmata = (Mat_SeqAIJ*) gmat->data;
135       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
136       gmataa = gmata->a;
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
142     } else {
143       /* receive numerical values from process 0*/
144       nz   = Ad->nz + Ao->nz;
145       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
146       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
147     }
148     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
149     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
150     ad = Ad->a;
151     ao = Ao->a;
152     if (mat->rmap->n) {
153       i  = 0;
154       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
155       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
156     }
157     for (i=1; i<mat->rmap->n; i++) {
158       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
159       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
160     }
161     i--;
162     if (mat->rmap->n) {
163       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
164     }
165     if (rank) {
166       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
167     }
168   }
169   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
170   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   CHKMEMQ;
172   PetscFunctionReturn(0);
173 }
174 
175 /*
176   Local utility routine that creates a mapping from the global column
177 number to the local number in the off-diagonal part of the local
178 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
179 a slightly higher hash table cost; without it it is not scalable (each processor
180 has an order N integer array but is fast to acess.
181 */
182 #undef __FUNCT__
183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
185 {
186   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
187   PetscErrorCode ierr;
188   PetscInt       n = aij->B->cmap->n,i;
189 
190   PetscFunctionBegin;
191 #if defined (PETSC_USE_CTABLE)
192   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
193   for (i=0; i<n; i++){
194     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
195   }
196 #else
197   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
198   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
199   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
201 #endif
202   PetscFunctionReturn(0);
203 }
204 
205 
206 #define CHUNKSIZE   15
207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
208 { \
209     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
210     lastcol1 = col;\
211     while (high1-low1 > 5) { \
212       t = (low1+high1)/2; \
213       if (rp1[t] > col) high1 = t; \
214       else             low1  = t; \
215     } \
216       for (_i=low1; _i<high1; _i++) { \
217         if (rp1[_i] > col) break; \
218         if (rp1[_i] == col) { \
219           if (addv == ADD_VALUES) ap1[_i] += value;   \
220           else                    ap1[_i] = value; \
221           goto a_noinsert; \
222         } \
223       }  \
224       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
225       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
226       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
227       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
228       N = nrow1++ - 1; a->nz++; high1++; \
229       /* shift up all the later entries in this row */ \
230       for (ii=N; ii>=_i; ii--) { \
231         rp1[ii+1] = rp1[ii]; \
232         ap1[ii+1] = ap1[ii]; \
233       } \
234       rp1[_i] = col;  \
235       ap1[_i] = value;  \
236       a_noinsert: ; \
237       ailen[row] = nrow1; \
238 }
239 
240 
241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
242 { \
243     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
244     lastcol2 = col;\
245     while (high2-low2 > 5) { \
246       t = (low2+high2)/2; \
247       if (rp2[t] > col) high2 = t; \
248       else             low2  = t; \
249     } \
250     for (_i=low2; _i<high2; _i++) {		\
251       if (rp2[_i] > col) break;			\
252       if (rp2[_i] == col) {			      \
253 	if (addv == ADD_VALUES) ap2[_i] += value;     \
254 	else                    ap2[_i] = value;      \
255 	goto b_noinsert;			      \
256       }						      \
257     }							      \
258     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
259     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
260     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
261     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
262     N = nrow2++ - 1; b->nz++; high2++;					\
263     /* shift up all the later entries in this row */			\
264     for (ii=N; ii>=_i; ii--) {						\
265       rp2[ii+1] = rp2[ii];						\
266       ap2[ii+1] = ap2[ii];						\
267     }									\
268     rp2[_i] = col;							\
269     ap2[_i] = value;							\
270     b_noinsert: ;								\
271     bilen[row] = nrow2;							\
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
277 {
278   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
279   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
280   PetscErrorCode ierr;
281   PetscInt       l,*garray = mat->garray,diag;
282 
283   PetscFunctionBegin;
284   /* code only works for square matrices A */
285 
286   /* find size of row to the left of the diagonal part */
287   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
288   row  = row - diag;
289   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
290     if (garray[b->j[b->i[row]+l]] > diag) break;
291   }
292   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
293 
294   /* diagonal part */
295   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
296 
297   /* right of diagonal part */
298   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatSetValues_MPIAIJ"
304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscScalar    value;
308   PetscErrorCode ierr;
309   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
310   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
311   PetscTruth     roworiented = aij->roworiented;
312 
313   /* Some Variables required in the macro */
314   Mat            A = aij->A;
315   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
316   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
317   MatScalar      *aa = a->a;
318   PetscTruth     ignorezeroentries = a->ignorezeroentries;
319   Mat            B = aij->B;
320   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
321   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
322   MatScalar      *ba = b->a;
323 
324   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
325   PetscInt       nonew = a->nonew;
326   MatScalar      *ap1,*ap2;
327 
328   PetscFunctionBegin;
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (!aij->donotstash) {
394         if (roworiented) {
395           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
396         } else {
397           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         }
399       }
400     }
401   }
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatGetValues_MPIAIJ"
407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
408 {
409   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
412   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
413 
414   PetscFunctionBegin;
415   for (i=0; i<m; i++) {
416     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
417     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
418     if (idxm[i] >= rstart && idxm[i] < rend) {
419       row = idxm[i] - rstart;
420       for (j=0; j<n; j++) {
421         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
422         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
423         if (idxn[j] >= cstart && idxn[j] < cend){
424           col = idxn[j] - cstart;
425           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
426         } else {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
429           }
430 #if defined (PETSC_USE_CTABLE)
431           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
432           col --;
433 #else
434           col = aij->colmap[idxn[j]] - 1;
435 #endif
436           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
437           else {
438             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
439           }
440         }
441       }
442     } else {
443       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
444     }
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscErrorCode ierr;
457   PetscInt       nstash,reallocs;
458   InsertMode     addv;
459 
460   PetscFunctionBegin;
461   if (aij->donotstash) {
462     PetscFunctionReturn(0);
463   }
464 
465   /* make sure all processors are either in INSERTMODE or ADDMODE */
466   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
467   if (addv == (ADD_VALUES|INSERT_VALUES)) {
468     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   }
470   mat->insertmode = addv; /* in case this processor had no cache */
471 
472   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
473   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
474   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
481 {
482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
483   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
484   PetscErrorCode ierr;
485   PetscMPIInt    n;
486   PetscInt       i,j,rstart,ncols,flg;
487   PetscInt       *row,*col;
488   PetscTruth     other_disassembled;
489   PetscScalar    *val;
490   InsertMode     addv = mat->insertmode;
491 
492   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
493   PetscFunctionBegin;
494   if (!aij->donotstash) {
495     while (1) {
496       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
497       if (!flg) break;
498 
499       for (i=0; i<n;) {
500         /* Now identify the consecutive vals belonging to the same row */
501         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
502         if (j < n) ncols = j-i;
503         else       ncols = n-i;
504         /* Now assemble all these values with a single function call */
505         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
506         i = j;
507       }
508     }
509     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
510   }
511   a->compressedrow.use     = PETSC_FALSE;
512   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
513   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
514 
515   /* determine if any processor has disassembled, if so we must
516      also disassemble ourselfs, in order that we may reassemble. */
517   /*
518      if nonzero structure of submatrix B cannot change then we know that
519      no processor disassembled thus we can skip this stuff
520   */
521   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
522     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
523     if (mat->was_assembled && !other_disassembled) {
524       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
525     }
526   }
527   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
528     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
529   }
530   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
531   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
532   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
533   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
534 
535   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
536   aij->rowvalues = 0;
537 
538   /* used by MatAXPY() */
539   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
540   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
541 
542   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
543   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
550 {
551   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
556   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatZeroRows_MPIAIJ"
562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
563 {
564   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
567   PetscInt       i,*owners = A->rmap->range;
568   PetscInt       *nprocs,j,idx,nsends,row;
569   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
570   PetscInt       *rvalues,count,base,slen,*source;
571   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
572   MPI_Comm       comm = ((PetscObject)A)->comm;
573   MPI_Request    *send_waits,*recv_waits;
574   MPI_Status     recv_status,*send_status;
575 #if defined(PETSC_DEBUG)
576   PetscTruth     found = PETSC_FALSE;
577 #endif
578 
579   PetscFunctionBegin;
580   /*  first count number of contributors to each processor */
581   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
582   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
583   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
584   j = 0;
585   for (i=0; i<N; i++) {
586     if (lastidx > (idx = rows[i])) j = 0;
587     lastidx = idx;
588     for (; j<size; j++) {
589       if (idx >= owners[j] && idx < owners[j+1]) {
590         nprocs[2*j]++;
591         nprocs[2*j+1] = 1;
592         owner[i] = j;
593 #if defined(PETSC_DEBUG)
594         found = PETSC_TRUE;
595 #endif
596         break;
597       }
598     }
599 #if defined(PETSC_DEBUG)
600     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
601     found = PETSC_FALSE;
602 #endif
603   }
604   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
605 
606   /* inform other processors of number of messages and max length*/
607   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
608 
609   /* post receives:   */
610   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
611   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
612   for (i=0; i<nrecvs; i++) {
613     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
614   }
615 
616   /* do sends:
617       1) starts[i] gives the starting index in svalues for stuff going to
618          the ith processor
619   */
620   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
621   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
622   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
623   starts[0] = 0;
624   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
625   for (i=0; i<N; i++) {
626     svalues[starts[owner[i]]++] = rows[i];
627   }
628 
629   starts[0] = 0;
630   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
631   count = 0;
632   for (i=0; i<size; i++) {
633     if (nprocs[2*i+1]) {
634       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
635     }
636   }
637   ierr = PetscFree(starts);CHKERRQ(ierr);
638 
639   base = owners[rank];
640 
641   /*  wait on receives */
642   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
643   source = lens + nrecvs;
644   count  = nrecvs; slen = 0;
645   while (count) {
646     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
647     /* unpack receives into our local space */
648     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
649     source[imdex]  = recv_status.MPI_SOURCE;
650     lens[imdex]    = n;
651     slen          += n;
652     count--;
653   }
654   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
655 
656   /* move the data into the send scatter */
657   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
658   count = 0;
659   for (i=0; i<nrecvs; i++) {
660     values = rvalues + i*nmax;
661     for (j=0; j<lens[i]; j++) {
662       lrows[count++] = values[j] - base;
663     }
664   }
665   ierr = PetscFree(rvalues);CHKERRQ(ierr);
666   ierr = PetscFree(lens);CHKERRQ(ierr);
667   ierr = PetscFree(owner);CHKERRQ(ierr);
668   ierr = PetscFree(nprocs);CHKERRQ(ierr);
669 
670   /* actually zap the local rows */
671   /*
672         Zero the required rows. If the "diagonal block" of the matrix
673      is square and the user wishes to set the diagonal we use separate
674      code so that MatSetValues() is not called for each diagonal allocating
675      new memory, thus calling lots of mallocs and slowing things down.
676 
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMultAdd_MPIAIJ"
745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
746 {
747   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
752   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
753   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
754   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
761 {
762   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
763   PetscErrorCode ierr;
764   PetscTruth     merged;
765 
766   PetscFunctionBegin;
767   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
768   /* do nondiagonal part */
769   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
770   if (!merged) {
771     /* send it on its way */
772     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773     /* do local part */
774     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
775     /* receive remote parts: note this assumes the values are not actually */
776     /* added in yy until the next line, */
777     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778   } else {
779     /* do local part */
780     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
781     /* send it on its way */
782     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
783     /* values actually were received in the Begin() but we need to call this nop */
784     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 EXTERN_C_BEGIN
790 #undef __FUNCT__
791 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
793 {
794   MPI_Comm       comm;
795   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
796   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
797   IS             Me,Notme;
798   PetscErrorCode ierr;
799   PetscInt       M,N,first,last,*notme,i;
800   PetscMPIInt    size;
801 
802   PetscFunctionBegin;
803 
804   /* Easy test: symmetric diagonal block */
805   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
806   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
807   if (!*f) PetscFunctionReturn(0);
808   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
809   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
810   if (size == 1) PetscFunctionReturn(0);
811 
812   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
813   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
814   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
815   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
816   for (i=0; i<first; i++) notme[i] = i;
817   for (i=last; i<M; i++) notme[i-last+first] = i;
818   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
819   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
820   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
821   Aoff = Aoffs[0];
822   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
823   Boff = Boffs[0];
824   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
825   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
826   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
827   ierr = ISDestroy(Me);CHKERRQ(ierr);
828   ierr = ISDestroy(Notme);CHKERRQ(ierr);
829 
830   PetscFunctionReturn(0);
831 }
832 EXTERN_C_END
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
837 {
838   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
839   PetscErrorCode ierr;
840 
841   PetscFunctionBegin;
842   /* do nondiagonal part */
843   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
844   /* send it on its way */
845   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
846   /* do local part */
847   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
848   /* receive remote parts */
849   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 /*
854   This only works correctly for square matrices where the subblock A->A is the
855    diagonal block
856 */
857 #undef __FUNCT__
858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
860 {
861   PetscErrorCode ierr;
862   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
863 
864   PetscFunctionBegin;
865   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
866   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
868   }
869   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIAIJ"
875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
876 {
877   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
882   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatDestroy_MPIAIJ"
888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
889 {
890   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_LOG)
895   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
896 #endif
897   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
898   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
899   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
900   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
901 #if defined (PETSC_USE_CTABLE)
902   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
903 #else
904   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
905 #endif
906   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
907   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
908   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
909   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
910   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
911   ierr = PetscFree(aij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
921   PetscFunctionReturn(0);
922 }
923 
924 #undef __FUNCT__
925 #define __FUNCT__ "MatView_MPIAIJ_Binary"
926 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
927 {
928   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
929   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
930   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
931   PetscErrorCode    ierr;
932   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
933   int               fd;
934   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
935   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
936   PetscScalar       *column_values;
937 
938   PetscFunctionBegin;
939   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
940   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
941   nz   = A->nz + B->nz;
942   if (!rank) {
943     header[0] = MAT_FILE_COOKIE;
944     header[1] = mat->rmap->N;
945     header[2] = mat->cmap->N;
946     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
947     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
948     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
949     /* get largest number of rows any processor has */
950     rlen = mat->rmap->n;
951     range = mat->rmap->range;
952     for (i=1; i<size; i++) {
953       rlen = PetscMax(rlen,range[i+1] - range[i]);
954     }
955   } else {
956     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
957     rlen = mat->rmap->n;
958   }
959 
960   /* load up the local row counts */
961   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
962   for (i=0; i<mat->rmap->n; i++) {
963     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
964   }
965 
966   /* store the row lengths to the file */
967   if (!rank) {
968     MPI_Status status;
969     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
970     for (i=1; i<size; i++) {
971       rlen = range[i+1] - range[i];
972       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
973       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
974     }
975   } else {
976     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
977   }
978   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
979 
980   /* load up the local column indices */
981   nzmax = nz; /* )th processor needs space a largest processor needs */
982   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
983   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
984   cnt  = 0;
985   for (i=0; i<mat->rmap->n; i++) {
986     for (j=B->i[i]; j<B->i[i+1]; j++) {
987       if ( (col = garray[B->j[j]]) > cstart) break;
988       column_indices[cnt++] = col;
989     }
990     for (k=A->i[i]; k<A->i[i+1]; k++) {
991       column_indices[cnt++] = A->j[k] + cstart;
992     }
993     for (; j<B->i[i+1]; j++) {
994       column_indices[cnt++] = garray[B->j[j]];
995     }
996   }
997   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
998 
999   /* store the column indices to the file */
1000   if (!rank) {
1001     MPI_Status status;
1002     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1003     for (i=1; i<size; i++) {
1004       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1005       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1006       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1007       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1008     }
1009   } else {
1010     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1011     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1012   }
1013   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1014 
1015   /* load up the local column values */
1016   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1017   cnt  = 0;
1018   for (i=0; i<mat->rmap->n; i++) {
1019     for (j=B->i[i]; j<B->i[i+1]; j++) {
1020       if ( garray[B->j[j]] > cstart) break;
1021       column_values[cnt++] = B->a[j];
1022     }
1023     for (k=A->i[i]; k<A->i[i+1]; k++) {
1024       column_values[cnt++] = A->a[k];
1025     }
1026     for (; j<B->i[i+1]; j++) {
1027       column_values[cnt++] = B->a[j];
1028     }
1029   }
1030   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1031 
1032   /* store the column values to the file */
1033   if (!rank) {
1034     MPI_Status status;
1035     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1036     for (i=1; i<size; i++) {
1037       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1038       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1039       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1040       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1041     }
1042   } else {
1043     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1044     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1045   }
1046   ierr = PetscFree(column_values);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 #undef __FUNCT__
1051 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1052 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1053 {
1054   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1055   PetscErrorCode    ierr;
1056   PetscMPIInt       rank = aij->rank,size = aij->size;
1057   PetscTruth        isdraw,iascii,isbinary;
1058   PetscViewer       sviewer;
1059   PetscViewerFormat format;
1060 
1061   PetscFunctionBegin;
1062   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1063   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1065   if (iascii) {
1066     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1067     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1068       MatInfo    info;
1069       PetscTruth inodes;
1070 
1071       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1072       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1073       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1074       if (!inodes) {
1075         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1076 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1077       } else {
1078         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1079 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1080       }
1081       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1082       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1083       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1084       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1085       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1086       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1087       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1088       PetscFunctionReturn(0);
1089     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1090       PetscInt   inodecount,inodelimit,*inodes;
1091       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1092       if (inodes) {
1093         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1094       } else {
1095         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1096       }
1097       PetscFunctionReturn(0);
1098     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1099       PetscFunctionReturn(0);
1100     }
1101   } else if (isbinary) {
1102     if (size == 1) {
1103       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1104       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1105     } else {
1106       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1107     }
1108     PetscFunctionReturn(0);
1109   } else if (isdraw) {
1110     PetscDraw  draw;
1111     PetscTruth isnull;
1112     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1113     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1114   }
1115 
1116   if (size == 1) {
1117     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1118     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1119   } else {
1120     /* assemble the entire matrix onto first processor. */
1121     Mat         A;
1122     Mat_SeqAIJ  *Aloc;
1123     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1124     MatScalar   *a;
1125 
1126     if (mat->rmap->N > 1024) {
1127       PetscTruth flg = PETSC_FALSE;
1128 
1129       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1130       if (!flg) {
1131         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1132       }
1133     }
1134 
1135     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1136     if (!rank) {
1137       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1138     } else {
1139       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1140     }
1141     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1142     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1143     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1144     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1145 
1146     /* copy over the A part */
1147     Aloc = (Mat_SeqAIJ*)aij->A->data;
1148     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1149     row = mat->rmap->rstart;
1150     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1151     for (i=0; i<m; i++) {
1152       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1153       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1154     }
1155     aj = Aloc->j;
1156     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1157 
1158     /* copy over the B part */
1159     Aloc = (Mat_SeqAIJ*)aij->B->data;
1160     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1161     row  = mat->rmap->rstart;
1162     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1163     ct   = cols;
1164     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1165     for (i=0; i<m; i++) {
1166       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1167       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1168     }
1169     ierr = PetscFree(ct);CHKERRQ(ierr);
1170     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1171     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     /*
1173        Everyone has to call to draw the matrix since the graphics waits are
1174        synchronized across all processors that share the PetscDraw object
1175     */
1176     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1177     if (!rank) {
1178       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1179       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1180     }
1181     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1182     ierr = MatDestroy(A);CHKERRQ(ierr);
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatView_MPIAIJ"
1189 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1190 {
1191   PetscErrorCode ierr;
1192   PetscTruth     iascii,isdraw,issocket,isbinary;
1193 
1194   PetscFunctionBegin;
1195   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1196   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1197   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1199   if (iascii || isdraw || isbinary || issocket) {
1200     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1201   } else {
1202     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1203   }
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatRelax_MPIAIJ"
1209 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1210 {
1211   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1212   PetscErrorCode ierr;
1213   Vec            bb1 = 0;
1214   PetscTruth     hasop;
1215 
1216   PetscFunctionBegin;
1217   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1218     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1219   }
1220 
1221   if (flag == SOR_APPLY_UPPER) {
1222     ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1223     PetscFunctionReturn(0);
1224   }
1225 
1226   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1227     if (flag & SOR_ZERO_INITIAL_GUESS) {
1228       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1229       its--;
1230     }
1231 
1232     while (its--) {
1233       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1234       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235 
1236       /* update rhs: bb1 = bb - B*x */
1237       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1238       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1239 
1240       /* local sweep */
1241       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1242     }
1243   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1244     if (flag & SOR_ZERO_INITIAL_GUESS) {
1245       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1246       its--;
1247     }
1248     while (its--) {
1249       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1250       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1251 
1252       /* update rhs: bb1 = bb - B*x */
1253       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1254       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1255 
1256       /* local sweep */
1257       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1258     }
1259   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1260     if (flag & SOR_ZERO_INITIAL_GUESS) {
1261       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1262       its--;
1263     }
1264     while (its--) {
1265       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1266       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267 
1268       /* update rhs: bb1 = bb - B*x */
1269       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1270       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1271 
1272       /* local sweep */
1273       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1274     }
1275   }  else if (flag & SOR_EISENSTAT) {
1276     Vec         xx1;
1277 
1278     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1279     ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1280 
1281     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1282     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283     if (!mat->diag) {
1284       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1285       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1286     }
1287     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1288     if (hasop) {
1289       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1290     } else {
1291       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1292     }
1293     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1294 
1295     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1296 
1297     /* local sweep */
1298     ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1299     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1300     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1301   } else {
1302     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1303   }
1304 
1305   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatPermute_MPIAIJ"
1311 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1312 {
1313   MPI_Comm       comm,pcomm;
1314   PetscInt       first,local_size,nrows;
1315   const PetscInt *rows;
1316   PetscMPIInt    size;
1317   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1318   PetscErrorCode ierr;
1319 
1320   PetscFunctionBegin;
1321   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1322   /* make a collective version of 'rowp' */
1323   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1324   if (pcomm==comm) {
1325     crowp = rowp;
1326   } else {
1327     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1328     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1329     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1330     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1331   }
1332   /* collect the global row permutation and invert it */
1333   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1334   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1335   if (pcomm!=comm) {
1336     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1337   }
1338   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1339   /* get the local target indices */
1340   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1341   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1342   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1343   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1344   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1345   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1346   /* the column permutation is so much easier;
1347      make a local version of 'colp' and invert it */
1348   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1349   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1350   if (size==1) {
1351     lcolp = colp;
1352   } else {
1353     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1354     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1355     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1356   }
1357   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1358   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1359   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1360   if (size>1) {
1361     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1362     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1363   }
1364   /* now we just get the submatrix */
1365   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1366   /* clean up */
1367   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1368   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 #undef __FUNCT__
1373 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1374 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1375 {
1376   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1377   Mat            A = mat->A,B = mat->B;
1378   PetscErrorCode ierr;
1379   PetscReal      isend[5],irecv[5];
1380 
1381   PetscFunctionBegin;
1382   info->block_size     = 1.0;
1383   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1384   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1385   isend[3] = info->memory;  isend[4] = info->mallocs;
1386   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1387   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1388   isend[3] += info->memory;  isend[4] += info->mallocs;
1389   if (flag == MAT_LOCAL) {
1390     info->nz_used      = isend[0];
1391     info->nz_allocated = isend[1];
1392     info->nz_unneeded  = isend[2];
1393     info->memory       = isend[3];
1394     info->mallocs      = isend[4];
1395   } else if (flag == MAT_GLOBAL_MAX) {
1396     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1397     info->nz_used      = irecv[0];
1398     info->nz_allocated = irecv[1];
1399     info->nz_unneeded  = irecv[2];
1400     info->memory       = irecv[3];
1401     info->mallocs      = irecv[4];
1402   } else if (flag == MAT_GLOBAL_SUM) {
1403     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1404     info->nz_used      = irecv[0];
1405     info->nz_allocated = irecv[1];
1406     info->nz_unneeded  = irecv[2];
1407     info->memory       = irecv[3];
1408     info->mallocs      = irecv[4];
1409   }
1410   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1411   info->fill_ratio_needed = 0;
1412   info->factor_mallocs    = 0;
1413 
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatSetOption_MPIAIJ"
1419 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1420 {
1421   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1422   PetscErrorCode ierr;
1423 
1424   PetscFunctionBegin;
1425   switch (op) {
1426   case MAT_NEW_NONZERO_LOCATIONS:
1427   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1428   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1429   case MAT_KEEP_NONZERO_PATTERN:
1430   case MAT_NEW_NONZERO_LOCATION_ERR:
1431   case MAT_USE_INODES:
1432   case MAT_IGNORE_ZERO_ENTRIES:
1433     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1434     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1435     break;
1436   case MAT_ROW_ORIENTED:
1437     a->roworiented = flg;
1438     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1439     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1440     break;
1441   case MAT_NEW_DIAGONALS:
1442     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1443     break;
1444   case MAT_IGNORE_OFF_PROC_ENTRIES:
1445     a->donotstash = PETSC_TRUE;
1446     break;
1447   case MAT_SYMMETRIC:
1448     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1449     break;
1450   case MAT_STRUCTURALLY_SYMMETRIC:
1451   case MAT_HERMITIAN:
1452   case MAT_SYMMETRY_ETERNAL:
1453     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1454     break;
1455   default:
1456     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1457   }
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 #undef __FUNCT__
1462 #define __FUNCT__ "MatGetRow_MPIAIJ"
1463 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1464 {
1465   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1466   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1467   PetscErrorCode ierr;
1468   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1469   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1470   PetscInt       *cmap,*idx_p;
1471 
1472   PetscFunctionBegin;
1473   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1474   mat->getrowactive = PETSC_TRUE;
1475 
1476   if (!mat->rowvalues && (idx || v)) {
1477     /*
1478         allocate enough space to hold information from the longest row.
1479     */
1480     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1481     PetscInt     max = 1,tmp;
1482     for (i=0; i<matin->rmap->n; i++) {
1483       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1484       if (max < tmp) { max = tmp; }
1485     }
1486     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1487     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1488   }
1489 
1490   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1491   lrow = row - rstart;
1492 
1493   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1494   if (!v)   {pvA = 0; pvB = 0;}
1495   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1496   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1497   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1498   nztot = nzA + nzB;
1499 
1500   cmap  = mat->garray;
1501   if (v  || idx) {
1502     if (nztot) {
1503       /* Sort by increasing column numbers, assuming A and B already sorted */
1504       PetscInt imark = -1;
1505       if (v) {
1506         *v = v_p = mat->rowvalues;
1507         for (i=0; i<nzB; i++) {
1508           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1509           else break;
1510         }
1511         imark = i;
1512         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1513         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1514       }
1515       if (idx) {
1516         *idx = idx_p = mat->rowindices;
1517         if (imark > -1) {
1518           for (i=0; i<imark; i++) {
1519             idx_p[i] = cmap[cworkB[i]];
1520           }
1521         } else {
1522           for (i=0; i<nzB; i++) {
1523             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1524             else break;
1525           }
1526           imark = i;
1527         }
1528         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1529         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1530       }
1531     } else {
1532       if (idx) *idx = 0;
1533       if (v)   *v   = 0;
1534     }
1535   }
1536   *nz = nztot;
1537   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1538   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 #undef __FUNCT__
1543 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1544 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1545 {
1546   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1547 
1548   PetscFunctionBegin;
1549   if (!aij->getrowactive) {
1550     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1551   }
1552   aij->getrowactive = PETSC_FALSE;
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatNorm_MPIAIJ"
1558 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1559 {
1560   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1561   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1562   PetscErrorCode ierr;
1563   PetscInt       i,j,cstart = mat->cmap->rstart;
1564   PetscReal      sum = 0.0;
1565   MatScalar      *v;
1566 
1567   PetscFunctionBegin;
1568   if (aij->size == 1) {
1569     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1570   } else {
1571     if (type == NORM_FROBENIUS) {
1572       v = amat->a;
1573       for (i=0; i<amat->nz; i++) {
1574 #if defined(PETSC_USE_COMPLEX)
1575         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1576 #else
1577         sum += (*v)*(*v); v++;
1578 #endif
1579       }
1580       v = bmat->a;
1581       for (i=0; i<bmat->nz; i++) {
1582 #if defined(PETSC_USE_COMPLEX)
1583         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1584 #else
1585         sum += (*v)*(*v); v++;
1586 #endif
1587       }
1588       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1589       *norm = sqrt(*norm);
1590     } else if (type == NORM_1) { /* max column norm */
1591       PetscReal *tmp,*tmp2;
1592       PetscInt  *jj,*garray = aij->garray;
1593       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1594       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1595       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1596       *norm = 0.0;
1597       v = amat->a; jj = amat->j;
1598       for (j=0; j<amat->nz; j++) {
1599         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1600       }
1601       v = bmat->a; jj = bmat->j;
1602       for (j=0; j<bmat->nz; j++) {
1603         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1604       }
1605       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1606       for (j=0; j<mat->cmap->N; j++) {
1607         if (tmp2[j] > *norm) *norm = tmp2[j];
1608       }
1609       ierr = PetscFree(tmp);CHKERRQ(ierr);
1610       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1611     } else if (type == NORM_INFINITY) { /* max row norm */
1612       PetscReal ntemp = 0.0;
1613       for (j=0; j<aij->A->rmap->n; j++) {
1614         v = amat->a + amat->i[j];
1615         sum = 0.0;
1616         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1617           sum += PetscAbsScalar(*v); v++;
1618         }
1619         v = bmat->a + bmat->i[j];
1620         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1621           sum += PetscAbsScalar(*v); v++;
1622         }
1623         if (sum > ntemp) ntemp = sum;
1624       }
1625       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1626     } else {
1627       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1628     }
1629   }
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 #undef __FUNCT__
1634 #define __FUNCT__ "MatTranspose_MPIAIJ"
1635 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1636 {
1637   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1638   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1639   PetscErrorCode ierr;
1640   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1641   PetscInt       cstart=A->cmap->rstart,ncol;
1642   Mat            B;
1643   MatScalar      *array;
1644 
1645   PetscFunctionBegin;
1646   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1647 
1648   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1649   ai = Aloc->i; aj = Aloc->j;
1650   bi = Bloc->i; bj = Bloc->j;
1651   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1652     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1653     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1654     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1655     for (i=0; i<ai[ma]; i++){
1656       d_nnz[aj[i]] ++;
1657       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1658     }
1659 
1660     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1661     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1662     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1663     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1664     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1665   } else {
1666     B = *matout;
1667   }
1668 
1669   /* copy over the A part */
1670   array = Aloc->a;
1671   row = A->rmap->rstart;
1672   for (i=0; i<ma; i++) {
1673     ncol = ai[i+1]-ai[i];
1674     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1675     row++; array += ncol; aj += ncol;
1676   }
1677   aj = Aloc->j;
1678   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1679 
1680   /* copy over the B part */
1681   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1682   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1683   array = Bloc->a;
1684   row = A->rmap->rstart;
1685   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1686   cols_tmp = cols;
1687   for (i=0; i<mb; i++) {
1688     ncol = bi[i+1]-bi[i];
1689     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1690     row++; array += ncol; cols_tmp += ncol;
1691   }
1692   ierr = PetscFree(cols);CHKERRQ(ierr);
1693 
1694   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1695   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1696   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1697     *matout = B;
1698   } else {
1699     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1700   }
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 #undef __FUNCT__
1705 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1706 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1707 {
1708   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1709   Mat            a = aij->A,b = aij->B;
1710   PetscErrorCode ierr;
1711   PetscInt       s1,s2,s3;
1712 
1713   PetscFunctionBegin;
1714   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1715   if (rr) {
1716     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1717     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1718     /* Overlap communication with computation. */
1719     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1720   }
1721   if (ll) {
1722     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1723     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1724     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1725   }
1726   /* scale  the diagonal block */
1727   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1728 
1729   if (rr) {
1730     /* Do a scatter end and then right scale the off-diagonal block */
1731     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1732     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1733   }
1734 
1735   PetscFunctionReturn(0);
1736 }
1737 
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1740 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1741 {
1742   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1743   PetscErrorCode ierr;
1744 
1745   PetscFunctionBegin;
1746   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1747   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1748   PetscFunctionReturn(0);
1749 }
1750 #undef __FUNCT__
1751 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1752 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1753 {
1754   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1755   PetscErrorCode ierr;
1756 
1757   PetscFunctionBegin;
1758   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 #undef __FUNCT__
1763 #define __FUNCT__ "MatEqual_MPIAIJ"
1764 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1765 {
1766   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1767   Mat            a,b,c,d;
1768   PetscTruth     flg;
1769   PetscErrorCode ierr;
1770 
1771   PetscFunctionBegin;
1772   a = matA->A; b = matA->B;
1773   c = matB->A; d = matB->B;
1774 
1775   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1776   if (flg) {
1777     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1778   }
1779   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1780   PetscFunctionReturn(0);
1781 }
1782 
1783 #undef __FUNCT__
1784 #define __FUNCT__ "MatCopy_MPIAIJ"
1785 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1786 {
1787   PetscErrorCode ierr;
1788   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1789   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1790 
1791   PetscFunctionBegin;
1792   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1793   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1794     /* because of the column compression in the off-processor part of the matrix a->B,
1795        the number of columns in a->B and b->B may be different, hence we cannot call
1796        the MatCopy() directly on the two parts. If need be, we can provide a more
1797        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1798        then copying the submatrices */
1799     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1800   } else {
1801     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1802     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1803   }
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 #undef __FUNCT__
1808 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1809 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1810 {
1811   PetscErrorCode ierr;
1812 
1813   PetscFunctionBegin;
1814   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1815   PetscFunctionReturn(0);
1816 }
1817 
1818 #include "petscblaslapack.h"
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatAXPY_MPIAIJ"
1821 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1822 {
1823   PetscErrorCode ierr;
1824   PetscInt       i;
1825   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1826   PetscBLASInt   bnz,one=1;
1827   Mat_SeqAIJ     *x,*y;
1828 
1829   PetscFunctionBegin;
1830   if (str == SAME_NONZERO_PATTERN) {
1831     PetscScalar alpha = a;
1832     x = (Mat_SeqAIJ *)xx->A->data;
1833     y = (Mat_SeqAIJ *)yy->A->data;
1834     bnz = PetscBLASIntCast(x->nz);
1835     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1836     x = (Mat_SeqAIJ *)xx->B->data;
1837     y = (Mat_SeqAIJ *)yy->B->data;
1838     bnz = PetscBLASIntCast(x->nz);
1839     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1840   } else if (str == SUBSET_NONZERO_PATTERN) {
1841     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1842 
1843     x = (Mat_SeqAIJ *)xx->B->data;
1844     y = (Mat_SeqAIJ *)yy->B->data;
1845     if (y->xtoy && y->XtoY != xx->B) {
1846       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1847       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1848     }
1849     if (!y->xtoy) { /* get xtoy */
1850       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1851       y->XtoY = xx->B;
1852       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1853     }
1854     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1855   } else {
1856     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1857   }
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatConjugate_MPIAIJ"
1865 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1866 {
1867 #if defined(PETSC_USE_COMPLEX)
1868   PetscErrorCode ierr;
1869   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1870 
1871   PetscFunctionBegin;
1872   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1873   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1874 #else
1875   PetscFunctionBegin;
1876 #endif
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #undef __FUNCT__
1881 #define __FUNCT__ "MatRealPart_MPIAIJ"
1882 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1883 {
1884   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1885   PetscErrorCode ierr;
1886 
1887   PetscFunctionBegin;
1888   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1889   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1895 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1896 {
1897   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1898   PetscErrorCode ierr;
1899 
1900   PetscFunctionBegin;
1901   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1902   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 #ifdef PETSC_HAVE_PBGL
1907 
1908 #include <boost/parallel/mpi/bsp_process_group.hpp>
1909 #include <boost/graph/distributed/ilu_default_graph.hpp>
1910 #include <boost/graph/distributed/ilu_0_block.hpp>
1911 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1912 #include <boost/graph/distributed/petsc/interface.hpp>
1913 #include <boost/multi_array.hpp>
1914 #include <boost/parallel/distributed_property_map->hpp>
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1918 /*
1919   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1920 */
1921 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1922 {
1923   namespace petsc = boost::distributed::petsc;
1924 
1925   namespace graph_dist = boost::graph::distributed;
1926   using boost::graph::distributed::ilu_default::process_group_type;
1927   using boost::graph::ilu_permuted;
1928 
1929   PetscTruth      row_identity, col_identity;
1930   PetscContainer  c;
1931   PetscInt        m, n, M, N;
1932   PetscErrorCode  ierr;
1933 
1934   PetscFunctionBegin;
1935   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1936   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1937   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1938   if (!row_identity || !col_identity) {
1939     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1940   }
1941 
1942   process_group_type pg;
1943   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1944   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1945   lgraph_type&   level_graph = *lgraph_p;
1946   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1947 
1948   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1949   ilu_permuted(level_graph);
1950 
1951   /* put together the new matrix */
1952   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1953   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1954   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1955   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1956   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1957   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1958   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1959 
1960   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1961   ierr = PetscContainerSetPointer(c, lgraph_p);
1962   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 #undef __FUNCT__
1967 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1968 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1969 {
1970   PetscFunctionBegin;
1971   PetscFunctionReturn(0);
1972 }
1973 
1974 #undef __FUNCT__
1975 #define __FUNCT__ "MatSolve_MPIAIJ"
1976 /*
1977   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1978 */
1979 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1980 {
1981   namespace graph_dist = boost::graph::distributed;
1982 
1983   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1984   lgraph_type*   lgraph_p;
1985   PetscContainer c;
1986   PetscErrorCode ierr;
1987 
1988   PetscFunctionBegin;
1989   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1990   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1991   ierr = VecCopy(b, x);CHKERRQ(ierr);
1992 
1993   PetscScalar* array_x;
1994   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1995   PetscInt sx;
1996   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1997 
1998   PetscScalar* array_b;
1999   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2000   PetscInt sb;
2001   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2002 
2003   lgraph_type&   level_graph = *lgraph_p;
2004   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2005 
2006   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2007   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2008                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2009 
2010   typedef boost::iterator_property_map<array_ref_type::iterator,
2011                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2012   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2013                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2014 
2015   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2016 
2017   PetscFunctionReturn(0);
2018 }
2019 #endif
2020 
2021 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2022   PetscInt       nzlocal,nsends,nrecvs;
2023   PetscMPIInt    *send_rank;
2024   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
2025   PetscScalar    *sbuf_a,**rbuf_a;
2026   PetscErrorCode (*MatDestroy)(Mat);
2027 } Mat_Redundant;
2028 
2029 #undef __FUNCT__
2030 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2031 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2032 {
2033   PetscErrorCode       ierr;
2034   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2035   PetscInt             i;
2036 
2037   PetscFunctionBegin;
2038   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
2039   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2040   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2041   for (i=0; i<redund->nrecvs; i++){
2042     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2043     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2044   }
2045   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2046   ierr = PetscFree(redund);CHKERRQ(ierr);
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 #undef __FUNCT__
2051 #define __FUNCT__ "MatDestroy_MatRedundant"
2052 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2053 {
2054   PetscErrorCode  ierr;
2055   PetscContainer  container;
2056   Mat_Redundant   *redund=PETSC_NULL;
2057 
2058   PetscFunctionBegin;
2059   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2060   if (container) {
2061     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2062   } else {
2063     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2064   }
2065   A->ops->destroy = redund->MatDestroy;
2066   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2067   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2068   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 #undef __FUNCT__
2073 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2074 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2075 {
2076   PetscMPIInt    rank,size;
2077   MPI_Comm       comm=((PetscObject)mat)->comm;
2078   PetscErrorCode ierr;
2079   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2080   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2081   PetscInt       *rowrange=mat->rmap->range;
2082   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2083   Mat            A=aij->A,B=aij->B,C=*matredundant;
2084   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2085   PetscScalar    *sbuf_a;
2086   PetscInt       nzlocal=a->nz+b->nz;
2087   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2088   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2089   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2090   MatScalar      *aworkA,*aworkB;
2091   PetscScalar    *vals;
2092   PetscMPIInt    tag1,tag2,tag3,imdex;
2093   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2094                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2095   MPI_Status     recv_status,*send_status;
2096   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2097   PetscInt       **rbuf_j=PETSC_NULL;
2098   PetscScalar    **rbuf_a=PETSC_NULL;
2099   Mat_Redundant  *redund=PETSC_NULL;
2100   PetscContainer container;
2101 
2102   PetscFunctionBegin;
2103   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2104   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2105 
2106   if (reuse == MAT_REUSE_MATRIX) {
2107     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2108     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2109     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2110     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2111     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2112     if (container) {
2113       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2114     } else {
2115       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2116     }
2117     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2118 
2119     nsends    = redund->nsends;
2120     nrecvs    = redund->nrecvs;
2121     send_rank = redund->send_rank; recv_rank = send_rank + size;
2122     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2123     sbuf_j    = redund->sbuf_j;
2124     sbuf_a    = redund->sbuf_a;
2125     rbuf_j    = redund->rbuf_j;
2126     rbuf_a    = redund->rbuf_a;
2127   }
2128 
2129   if (reuse == MAT_INITIAL_MATRIX){
2130     PetscMPIInt  subrank,subsize;
2131     PetscInt     nleftover,np_subcomm;
2132     /* get the destination processors' id send_rank, nsends and nrecvs */
2133     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2134     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2135     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2136     recv_rank = send_rank + size;
2137     np_subcomm = size/nsubcomm;
2138     nleftover  = size - nsubcomm*np_subcomm;
2139     nsends = 0; nrecvs = 0;
2140     for (i=0; i<size; i++){ /* i=rank*/
2141       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2142         send_rank[nsends] = i; nsends++;
2143         recv_rank[nrecvs++] = i;
2144       }
2145     }
2146     if (rank >= size - nleftover){/* this proc is a leftover processor */
2147       i = size-nleftover-1;
2148       j = 0;
2149       while (j < nsubcomm - nleftover){
2150         send_rank[nsends++] = i;
2151         i--; j++;
2152       }
2153     }
2154 
2155     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2156       for (i=0; i<nleftover; i++){
2157         recv_rank[nrecvs++] = size-nleftover+i;
2158       }
2159     }
2160 
2161     /* allocate sbuf_j, sbuf_a */
2162     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2163     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2164     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2165   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2166 
2167   /* copy mat's local entries into the buffers */
2168   if (reuse == MAT_INITIAL_MATRIX){
2169     rownz_max = 0;
2170     rptr = sbuf_j;
2171     cols = sbuf_j + rend-rstart + 1;
2172     vals = sbuf_a;
2173     rptr[0] = 0;
2174     for (i=0; i<rend-rstart; i++){
2175       row = i + rstart;
2176       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2177       ncols  = nzA + nzB;
2178       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2179       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2180       /* load the column indices for this row into cols */
2181       lwrite = 0;
2182       for (l=0; l<nzB; l++) {
2183         if ((ctmp = bmap[cworkB[l]]) < cstart){
2184           vals[lwrite]   = aworkB[l];
2185           cols[lwrite++] = ctmp;
2186         }
2187       }
2188       for (l=0; l<nzA; l++){
2189         vals[lwrite]   = aworkA[l];
2190         cols[lwrite++] = cstart + cworkA[l];
2191       }
2192       for (l=0; l<nzB; l++) {
2193         if ((ctmp = bmap[cworkB[l]]) >= cend){
2194           vals[lwrite]   = aworkB[l];
2195           cols[lwrite++] = ctmp;
2196         }
2197       }
2198       vals += ncols;
2199       cols += ncols;
2200       rptr[i+1] = rptr[i] + ncols;
2201       if (rownz_max < ncols) rownz_max = ncols;
2202     }
2203     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2204   } else { /* only copy matrix values into sbuf_a */
2205     rptr = sbuf_j;
2206     vals = sbuf_a;
2207     rptr[0] = 0;
2208     for (i=0; i<rend-rstart; i++){
2209       row = i + rstart;
2210       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2211       ncols  = nzA + nzB;
2212       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2213       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2214       lwrite = 0;
2215       for (l=0; l<nzB; l++) {
2216         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2217       }
2218       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2219       for (l=0; l<nzB; l++) {
2220         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2221       }
2222       vals += ncols;
2223       rptr[i+1] = rptr[i] + ncols;
2224     }
2225   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2226 
2227   /* send nzlocal to others, and recv other's nzlocal */
2228   /*--------------------------------------------------*/
2229   if (reuse == MAT_INITIAL_MATRIX){
2230     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2231     s_waits2 = s_waits3 + nsends;
2232     s_waits1 = s_waits2 + nsends;
2233     r_waits1 = s_waits1 + nsends;
2234     r_waits2 = r_waits1 + nrecvs;
2235     r_waits3 = r_waits2 + nrecvs;
2236   } else {
2237     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2238     r_waits3 = s_waits3 + nsends;
2239   }
2240 
2241   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2242   if (reuse == MAT_INITIAL_MATRIX){
2243     /* get new tags to keep the communication clean */
2244     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2245     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2246     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2247     rbuf_nz = sbuf_nz + nsends;
2248 
2249     /* post receives of other's nzlocal */
2250     for (i=0; i<nrecvs; i++){
2251       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2252     }
2253     /* send nzlocal to others */
2254     for (i=0; i<nsends; i++){
2255       sbuf_nz[i] = nzlocal;
2256       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2257     }
2258     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2259     count = nrecvs;
2260     while (count) {
2261       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2262       recv_rank[imdex] = recv_status.MPI_SOURCE;
2263       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2264       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2265 
2266       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2267       rbuf_nz[imdex] += i + 2;
2268       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2269       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2270       count--;
2271     }
2272     /* wait on sends of nzlocal */
2273     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2274     /* send mat->i,j to others, and recv from other's */
2275     /*------------------------------------------------*/
2276     for (i=0; i<nsends; i++){
2277       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2278       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2279     }
2280     /* wait on receives of mat->i,j */
2281     /*------------------------------*/
2282     count = nrecvs;
2283     while (count) {
2284       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2285       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2286       count--;
2287     }
2288     /* wait on sends of mat->i,j */
2289     /*---------------------------*/
2290     if (nsends) {
2291       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2292     }
2293   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2294 
2295   /* post receives, send and receive mat->a */
2296   /*----------------------------------------*/
2297   for (imdex=0; imdex<nrecvs; imdex++) {
2298     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2299   }
2300   for (i=0; i<nsends; i++){
2301     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2302   }
2303   count = nrecvs;
2304   while (count) {
2305     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2306     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2307     count--;
2308   }
2309   if (nsends) {
2310     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2311   }
2312 
2313   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2314 
2315   /* create redundant matrix */
2316   /*-------------------------*/
2317   if (reuse == MAT_INITIAL_MATRIX){
2318     /* compute rownz_max for preallocation */
2319     for (imdex=0; imdex<nrecvs; imdex++){
2320       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2321       rptr = rbuf_j[imdex];
2322       for (i=0; i<j; i++){
2323         ncols = rptr[i+1] - rptr[i];
2324         if (rownz_max < ncols) rownz_max = ncols;
2325       }
2326     }
2327 
2328     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2329     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2330     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2331     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2332     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2333   } else {
2334     C = *matredundant;
2335   }
2336 
2337   /* insert local matrix entries */
2338   rptr = sbuf_j;
2339   cols = sbuf_j + rend-rstart + 1;
2340   vals = sbuf_a;
2341   for (i=0; i<rend-rstart; i++){
2342     row   = i + rstart;
2343     ncols = rptr[i+1] - rptr[i];
2344     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2345     vals += ncols;
2346     cols += ncols;
2347   }
2348   /* insert received matrix entries */
2349   for (imdex=0; imdex<nrecvs; imdex++){
2350     rstart = rowrange[recv_rank[imdex]];
2351     rend   = rowrange[recv_rank[imdex]+1];
2352     rptr = rbuf_j[imdex];
2353     cols = rbuf_j[imdex] + rend-rstart + 1;
2354     vals = rbuf_a[imdex];
2355     for (i=0; i<rend-rstart; i++){
2356       row   = i + rstart;
2357       ncols = rptr[i+1] - rptr[i];
2358       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2359       vals += ncols;
2360       cols += ncols;
2361     }
2362   }
2363   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2364   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2365   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2366   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2367   if (reuse == MAT_INITIAL_MATRIX){
2368     PetscContainer container;
2369     *matredundant = C;
2370     /* create a supporting struct and attach it to C for reuse */
2371     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2372     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2373     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2374     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2375     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2376 
2377     redund->nzlocal = nzlocal;
2378     redund->nsends  = nsends;
2379     redund->nrecvs  = nrecvs;
2380     redund->send_rank = send_rank;
2381     redund->sbuf_nz = sbuf_nz;
2382     redund->sbuf_j  = sbuf_j;
2383     redund->sbuf_a  = sbuf_a;
2384     redund->rbuf_j  = rbuf_j;
2385     redund->rbuf_a  = rbuf_a;
2386 
2387     redund->MatDestroy = C->ops->destroy;
2388     C->ops->destroy    = MatDestroy_MatRedundant;
2389   }
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2395 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2396 {
2397   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2398   PetscErrorCode ierr;
2399   PetscInt       i,*idxb = 0;
2400   PetscScalar    *va,*vb;
2401   Vec            vtmp;
2402 
2403   PetscFunctionBegin;
2404   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2405   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2406   if (idx) {
2407     for (i=0; i<A->rmap->n; i++) {
2408       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2409     }
2410   }
2411 
2412   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2413   if (idx) {
2414     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2415   }
2416   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2417   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2418 
2419   for (i=0; i<A->rmap->n; i++){
2420     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2421       va[i] = vb[i];
2422       if (idx) idx[i] = a->garray[idxb[i]];
2423     }
2424   }
2425 
2426   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2427   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2428   if (idxb) {
2429     ierr = PetscFree(idxb);CHKERRQ(ierr);
2430   }
2431   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2432   PetscFunctionReturn(0);
2433 }
2434 
2435 #undef __FUNCT__
2436 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2437 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2438 {
2439   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2440   PetscErrorCode ierr;
2441   PetscInt       i,*idxb = 0;
2442   PetscScalar    *va,*vb;
2443   Vec            vtmp;
2444 
2445   PetscFunctionBegin;
2446   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2447   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2448   if (idx) {
2449     for (i=0; i<A->cmap->n; i++) {
2450       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2451     }
2452   }
2453 
2454   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2455   if (idx) {
2456     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2457   }
2458   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2459   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2460 
2461   for (i=0; i<A->rmap->n; i++){
2462     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2463       va[i] = vb[i];
2464       if (idx) idx[i] = a->garray[idxb[i]];
2465     }
2466   }
2467 
2468   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2469   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2470   if (idxb) {
2471     ierr = PetscFree(idxb);CHKERRQ(ierr);
2472   }
2473   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 #undef __FUNCT__
2478 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2479 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2480 {
2481   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2482   PetscInt       n      = A->rmap->n;
2483   PetscInt       cstart = A->cmap->rstart;
2484   PetscInt      *cmap   = mat->garray;
2485   PetscInt      *diagIdx, *offdiagIdx;
2486   Vec            diagV, offdiagV;
2487   PetscScalar   *a, *diagA, *offdiagA;
2488   PetscInt       r;
2489   PetscErrorCode ierr;
2490 
2491   PetscFunctionBegin;
2492   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2493   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2494   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2495   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2496   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2497   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2498   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2499   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2500   for(r = 0; r < n; ++r) {
2501     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2502       a[r]   = diagA[r];
2503       idx[r] = cstart + diagIdx[r];
2504     } else {
2505       a[r]   = offdiagA[r];
2506       idx[r] = cmap[offdiagIdx[r]];
2507     }
2508   }
2509   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2510   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2511   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2512   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2513   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2514   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 #undef __FUNCT__
2519 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2520 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2521 {
2522   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2523   PetscInt       n      = A->rmap->n;
2524   PetscInt       cstart = A->cmap->rstart;
2525   PetscInt      *cmap   = mat->garray;
2526   PetscInt      *diagIdx, *offdiagIdx;
2527   Vec            diagV, offdiagV;
2528   PetscScalar   *a, *diagA, *offdiagA;
2529   PetscInt       r;
2530   PetscErrorCode ierr;
2531 
2532   PetscFunctionBegin;
2533   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2534   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2535   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2536   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2537   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2538   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2539   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2540   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2541   for(r = 0; r < n; ++r) {
2542     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2543       a[r]   = diagA[r];
2544       idx[r] = cstart + diagIdx[r];
2545     } else {
2546       a[r]   = offdiagA[r];
2547       idx[r] = cmap[offdiagIdx[r]];
2548     }
2549   }
2550   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2551   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2552   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2553   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2554   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2555   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 #undef __FUNCT__
2560 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2561 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2562 {
2563   PetscErrorCode ierr;
2564   Mat            *dummy;
2565 
2566   PetscFunctionBegin;
2567   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2568   *newmat = *dummy;
2569   ierr = PetscFree(dummy);CHKERRQ(ierr);
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2574 /* -------------------------------------------------------------------*/
2575 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2576        MatGetRow_MPIAIJ,
2577        MatRestoreRow_MPIAIJ,
2578        MatMult_MPIAIJ,
2579 /* 4*/ MatMultAdd_MPIAIJ,
2580        MatMultTranspose_MPIAIJ,
2581        MatMultTransposeAdd_MPIAIJ,
2582 #ifdef PETSC_HAVE_PBGL
2583        MatSolve_MPIAIJ,
2584 #else
2585        0,
2586 #endif
2587        0,
2588        0,
2589 /*10*/ 0,
2590        0,
2591        0,
2592        MatRelax_MPIAIJ,
2593        MatTranspose_MPIAIJ,
2594 /*15*/ MatGetInfo_MPIAIJ,
2595        MatEqual_MPIAIJ,
2596        MatGetDiagonal_MPIAIJ,
2597        MatDiagonalScale_MPIAIJ,
2598        MatNorm_MPIAIJ,
2599 /*20*/ MatAssemblyBegin_MPIAIJ,
2600        MatAssemblyEnd_MPIAIJ,
2601        MatSetOption_MPIAIJ,
2602        MatZeroEntries_MPIAIJ,
2603 /*24*/ MatZeroRows_MPIAIJ,
2604        0,
2605 #ifdef PETSC_HAVE_PBGL
2606        0,
2607 #else
2608        0,
2609 #endif
2610        0,
2611        0,
2612 /*29*/ MatSetUpPreallocation_MPIAIJ,
2613 #ifdef PETSC_HAVE_PBGL
2614        0,
2615 #else
2616        0,
2617 #endif
2618        0,
2619        0,
2620        0,
2621 /*34*/ MatDuplicate_MPIAIJ,
2622        0,
2623        0,
2624        0,
2625        0,
2626 /*39*/ MatAXPY_MPIAIJ,
2627        MatGetSubMatrices_MPIAIJ,
2628        MatIncreaseOverlap_MPIAIJ,
2629        MatGetValues_MPIAIJ,
2630        MatCopy_MPIAIJ,
2631 /*44*/ MatGetRowMax_MPIAIJ,
2632        MatScale_MPIAIJ,
2633        0,
2634        0,
2635        0,
2636 /*49*/ MatSetBlockSize_MPIAIJ,
2637        0,
2638        0,
2639        0,
2640        0,
2641 /*54*/ MatFDColoringCreate_MPIAIJ,
2642        0,
2643        MatSetUnfactored_MPIAIJ,
2644        MatPermute_MPIAIJ,
2645        0,
2646 /*59*/ MatGetSubMatrix_MPIAIJ,
2647        MatDestroy_MPIAIJ,
2648        MatView_MPIAIJ,
2649        0,
2650        0,
2651 /*64*/ 0,
2652        0,
2653        0,
2654        0,
2655        0,
2656 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2657        MatGetRowMinAbs_MPIAIJ,
2658        0,
2659        MatSetColoring_MPIAIJ,
2660 #if defined(PETSC_HAVE_ADIC)
2661        MatSetValuesAdic_MPIAIJ,
2662 #else
2663        0,
2664 #endif
2665        MatSetValuesAdifor_MPIAIJ,
2666 /*75*/ MatFDColoringApply_AIJ,
2667        0,
2668        0,
2669        0,
2670        0,
2671 /*80*/ 0,
2672        0,
2673        0,
2674 /*83*/ MatLoad_MPIAIJ,
2675        0,
2676        0,
2677        0,
2678        0,
2679        0,
2680 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2681        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2682        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2683        MatPtAP_Basic,
2684        MatPtAPSymbolic_MPIAIJ,
2685 /*94*/ MatPtAPNumeric_MPIAIJ,
2686        0,
2687        0,
2688        0,
2689        0,
2690 /*99*/ 0,
2691        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2692        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2693        MatConjugate_MPIAIJ,
2694        0,
2695 /*104*/MatSetValuesRow_MPIAIJ,
2696        MatRealPart_MPIAIJ,
2697        MatImaginaryPart_MPIAIJ,
2698        0,
2699        0,
2700 /*109*/0,
2701        MatGetRedundantMatrix_MPIAIJ,
2702        MatGetRowMin_MPIAIJ,
2703        0,
2704        0,
2705 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2706        0,
2707        0,
2708        0,
2709        0,
2710        0
2711 };
2712 
2713 /* ----------------------------------------------------------------------------------------*/
2714 
2715 EXTERN_C_BEGIN
2716 #undef __FUNCT__
2717 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2718 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2719 {
2720   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2721   PetscErrorCode ierr;
2722 
2723   PetscFunctionBegin;
2724   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2725   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2726   PetscFunctionReturn(0);
2727 }
2728 EXTERN_C_END
2729 
2730 EXTERN_C_BEGIN
2731 #undef __FUNCT__
2732 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2733 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2734 {
2735   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2736   PetscErrorCode ierr;
2737 
2738   PetscFunctionBegin;
2739   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2740   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2741   PetscFunctionReturn(0);
2742 }
2743 EXTERN_C_END
2744 
2745 #include "petscpc.h"
2746 EXTERN_C_BEGIN
2747 #undef __FUNCT__
2748 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2749 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2750 {
2751   Mat_MPIAIJ     *b;
2752   PetscErrorCode ierr;
2753   PetscInt       i;
2754 
2755   PetscFunctionBegin;
2756   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2757   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2758   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2759   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2760 
2761   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2762   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2763   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2764   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2765   if (d_nnz) {
2766     for (i=0; i<B->rmap->n; i++) {
2767       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2768     }
2769   }
2770   if (o_nnz) {
2771     for (i=0; i<B->rmap->n; i++) {
2772       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2773     }
2774   }
2775   b = (Mat_MPIAIJ*)B->data;
2776 
2777   if (!B->preallocated) {
2778     /* Explicitly create 2 MATSEQAIJ matrices. */
2779     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2780     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2781     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2782     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2783     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2784     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2785     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2786     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2787   }
2788 
2789   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2790   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2791   B->preallocated = PETSC_TRUE;
2792   PetscFunctionReturn(0);
2793 }
2794 EXTERN_C_END
2795 
2796 #undef __FUNCT__
2797 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2798 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2799 {
2800   Mat            mat;
2801   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2802   PetscErrorCode ierr;
2803 
2804   PetscFunctionBegin;
2805   *newmat       = 0;
2806   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2807   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2808   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2809   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2810   a    = (Mat_MPIAIJ*)mat->data;
2811 
2812   mat->factor       = matin->factor;
2813   mat->rmap->bs      = matin->rmap->bs;
2814   mat->assembled    = PETSC_TRUE;
2815   mat->insertmode   = NOT_SET_VALUES;
2816   mat->preallocated = PETSC_TRUE;
2817 
2818   a->size           = oldmat->size;
2819   a->rank           = oldmat->rank;
2820   a->donotstash     = oldmat->donotstash;
2821   a->roworiented    = oldmat->roworiented;
2822   a->rowindices     = 0;
2823   a->rowvalues      = 0;
2824   a->getrowactive   = PETSC_FALSE;
2825 
2826   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2827   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2828 
2829   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2830   if (oldmat->colmap) {
2831 #if defined (PETSC_USE_CTABLE)
2832     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2833 #else
2834     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2835     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2836     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2837 #endif
2838   } else a->colmap = 0;
2839   if (oldmat->garray) {
2840     PetscInt len;
2841     len  = oldmat->B->cmap->n;
2842     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2843     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2844     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2845   } else a->garray = 0;
2846 
2847   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2848   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2849   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2850   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2851   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2852   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2853   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2854   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2855   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2856   *newmat = mat;
2857   PetscFunctionReturn(0);
2858 }
2859 
2860 #include "petscsys.h"
2861 
2862 #undef __FUNCT__
2863 #define __FUNCT__ "MatLoad_MPIAIJ"
2864 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2865 {
2866   Mat            A;
2867   PetscScalar    *vals,*svals;
2868   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2869   MPI_Status     status;
2870   PetscErrorCode ierr;
2871   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2872   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2873   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2874   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2875   PetscInt       cend,cstart,n,*rowners;
2876   int            fd;
2877 
2878   PetscFunctionBegin;
2879   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2880   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2881   if (!rank) {
2882     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2883     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2884     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2885   }
2886 
2887   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2888   M = header[1]; N = header[2];
2889   /* determine ownership of all rows */
2890   m    = M/size + ((M % size) > rank);
2891   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2892   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2893 
2894   /* First process needs enough room for process with most rows */
2895   if (!rank) {
2896     mmax       = rowners[1];
2897     for (i=2; i<size; i++) {
2898       mmax = PetscMax(mmax,rowners[i]);
2899     }
2900   } else mmax = m;
2901 
2902   rowners[0] = 0;
2903   for (i=2; i<=size; i++) {
2904     rowners[i] += rowners[i-1];
2905   }
2906   rstart = rowners[rank];
2907   rend   = rowners[rank+1];
2908 
2909   /* distribute row lengths to all processors */
2910   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2911   if (!rank) {
2912     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2913     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2914     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2915     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2916     for (j=0; j<m; j++) {
2917       procsnz[0] += ourlens[j];
2918     }
2919     for (i=1; i<size; i++) {
2920       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2921       /* calculate the number of nonzeros on each processor */
2922       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2923         procsnz[i] += rowlengths[j];
2924       }
2925       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2926       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2927     }
2928     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2929   } else {
2930     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2931     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2932   }
2933 
2934   if (!rank) {
2935     /* determine max buffer needed and allocate it */
2936     maxnz = 0;
2937     for (i=0; i<size; i++) {
2938       maxnz = PetscMax(maxnz,procsnz[i]);
2939     }
2940     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2941 
2942     /* read in my part of the matrix column indices  */
2943     nz   = procsnz[0];
2944     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2945     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2946 
2947     /* read in every one elses and ship off */
2948     for (i=1; i<size; i++) {
2949       nz     = procsnz[i];
2950       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2951       mpicnt = PetscMPIIntCast(nz);
2952       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2953     }
2954     ierr = PetscFree(cols);CHKERRQ(ierr);
2955   } else {
2956     /* determine buffer space needed for message */
2957     nz = 0;
2958     for (i=0; i<m; i++) {
2959       nz += ourlens[i];
2960     }
2961     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2962 
2963     /* receive message of column indices*/
2964     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2965     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2966     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2967     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2968     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2969     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2970   }
2971 
2972   /* determine column ownership if matrix is not square */
2973   if (N != M) {
2974     n      = N/size + ((N % size) > rank);
2975     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2976     cstart = cend - n;
2977   } else {
2978     cstart = rstart;
2979     cend   = rend;
2980     n      = cend - cstart;
2981   }
2982 
2983   /* loop over local rows, determining number of off diagonal entries */
2984   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2985   jj = 0;
2986   for (i=0; i<m; i++) {
2987     for (j=0; j<ourlens[i]; j++) {
2988       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2989       jj++;
2990     }
2991   }
2992 
2993   /* create our matrix */
2994   for (i=0; i<m; i++) {
2995     ourlens[i] -= offlens[i];
2996   }
2997   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2998   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2999   ierr = MatSetType(A,type);CHKERRQ(ierr);
3000   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
3001 
3002   for (i=0; i<m; i++) {
3003     ourlens[i] += offlens[i];
3004   }
3005 
3006   if (!rank) {
3007     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3008 
3009     /* read in my part of the matrix numerical values  */
3010     nz   = procsnz[0];
3011     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3012 
3013     /* insert into matrix */
3014     jj      = rstart;
3015     smycols = mycols;
3016     svals   = vals;
3017     for (i=0; i<m; i++) {
3018       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3019       smycols += ourlens[i];
3020       svals   += ourlens[i];
3021       jj++;
3022     }
3023 
3024     /* read in other processors and ship out */
3025     for (i=1; i<size; i++) {
3026       nz     = procsnz[i];
3027       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3028       mpicnt = PetscMPIIntCast(nz);
3029       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3030     }
3031     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3032   } else {
3033     /* receive numeric values */
3034     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3035 
3036     /* receive message of values*/
3037     mpicnt = PetscMPIIntCast(nz);
3038     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3039     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3040     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3041     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3042     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3043 
3044     /* insert into matrix */
3045     jj      = rstart;
3046     smycols = mycols;
3047     svals   = vals;
3048     for (i=0; i<m; i++) {
3049       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3050       smycols += ourlens[i];
3051       svals   += ourlens[i];
3052       jj++;
3053     }
3054   }
3055   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3056   ierr = PetscFree(vals);CHKERRQ(ierr);
3057   ierr = PetscFree(mycols);CHKERRQ(ierr);
3058   ierr = PetscFree(rowners);CHKERRQ(ierr);
3059 
3060   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3061   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3062   *newmat = A;
3063   PetscFunctionReturn(0);
3064 }
3065 
3066 #undef __FUNCT__
3067 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3068 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3069 {
3070   PetscErrorCode ierr;
3071   IS             iscol_local;
3072   PetscInt       csize;
3073 
3074   PetscFunctionBegin;
3075   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3076   if (call == MAT_REUSE_MATRIX) {
3077     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3078     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3079   } else {
3080     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3081   }
3082   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3083   if (call == MAT_INITIAL_MATRIX) {
3084     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3085     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3086   }
3087   PetscFunctionReturn(0);
3088 }
3089 
3090 #undef __FUNCT__
3091 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3092 /*
3093     Not great since it makes two copies of the submatrix, first an SeqAIJ
3094   in local and then by concatenating the local matrices the end result.
3095   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3096 
3097   Note: This requires a sequential iscol with all indices.
3098 */
3099 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3100 {
3101   PetscErrorCode ierr;
3102   PetscMPIInt    rank,size;
3103   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3104   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3105   Mat            *local,M,Mreuse;
3106   MatScalar      *vwork,*aa;
3107   MPI_Comm       comm = ((PetscObject)mat)->comm;
3108   Mat_SeqAIJ     *aij;
3109 
3110 
3111   PetscFunctionBegin;
3112   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3113   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3114 
3115   if (call ==  MAT_REUSE_MATRIX) {
3116     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3117     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3118     local = &Mreuse;
3119     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3120   } else {
3121     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3122     Mreuse = *local;
3123     ierr   = PetscFree(local);CHKERRQ(ierr);
3124   }
3125 
3126   /*
3127       m - number of local rows
3128       n - number of columns (same on all processors)
3129       rstart - first row in new global matrix generated
3130   */
3131   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3132   if (call == MAT_INITIAL_MATRIX) {
3133     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3134     ii  = aij->i;
3135     jj  = aij->j;
3136 
3137     /*
3138         Determine the number of non-zeros in the diagonal and off-diagonal
3139         portions of the matrix in order to do correct preallocation
3140     */
3141 
3142     /* first get start and end of "diagonal" columns */
3143     if (csize == PETSC_DECIDE) {
3144       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3145       if (mglobal == n) { /* square matrix */
3146 	nlocal = m;
3147       } else {
3148         nlocal = n/size + ((n % size) > rank);
3149       }
3150     } else {
3151       nlocal = csize;
3152     }
3153     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3154     rstart = rend - nlocal;
3155     if (rank == size - 1 && rend != n) {
3156       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3157     }
3158 
3159     /* next, compute all the lengths */
3160     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3161     olens = dlens + m;
3162     for (i=0; i<m; i++) {
3163       jend = ii[i+1] - ii[i];
3164       olen = 0;
3165       dlen = 0;
3166       for (j=0; j<jend; j++) {
3167         if (*jj < rstart || *jj >= rend) olen++;
3168         else dlen++;
3169         jj++;
3170       }
3171       olens[i] = olen;
3172       dlens[i] = dlen;
3173     }
3174     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3175     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3176     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3177     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3178     ierr = PetscFree(dlens);CHKERRQ(ierr);
3179   } else {
3180     PetscInt ml,nl;
3181 
3182     M = *newmat;
3183     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3184     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3185     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3186     /*
3187          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3188        rather than the slower MatSetValues().
3189     */
3190     M->was_assembled = PETSC_TRUE;
3191     M->assembled     = PETSC_FALSE;
3192   }
3193   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3194   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3195   ii  = aij->i;
3196   jj  = aij->j;
3197   aa  = aij->a;
3198   for (i=0; i<m; i++) {
3199     row   = rstart + i;
3200     nz    = ii[i+1] - ii[i];
3201     cwork = jj;     jj += nz;
3202     vwork = aa;     aa += nz;
3203     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3204   }
3205 
3206   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3207   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3208   *newmat = M;
3209 
3210   /* save submatrix used in processor for next request */
3211   if (call ==  MAT_INITIAL_MATRIX) {
3212     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3213     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3214   }
3215 
3216   PetscFunctionReturn(0);
3217 }
3218 
3219 EXTERN_C_BEGIN
3220 #undef __FUNCT__
3221 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3222 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3223 {
3224   PetscInt       m,cstart, cend,j,nnz,i,d;
3225   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3226   const PetscInt *JJ;
3227   PetscScalar    *values;
3228   PetscErrorCode ierr;
3229 
3230   PetscFunctionBegin;
3231   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3232 
3233   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3234   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3235   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3236   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3237   m      = B->rmap->n;
3238   cstart = B->cmap->rstart;
3239   cend   = B->cmap->rend;
3240   rstart = B->rmap->rstart;
3241 
3242   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3243   o_nnz = d_nnz + m;
3244 
3245 #if defined(PETSC_USE_DEBUGGING)
3246   for (i=0; i<m; i++) {
3247     nnz     = Ii[i+1]- Ii[i];
3248     JJ      = J + Ii[i];
3249     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3250     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3251     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3252     for (j=1; j<nnz; j++) {
3253       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3254     }
3255   }
3256 #endif
3257 
3258   for (i=0; i<m; i++) {
3259     nnz     = Ii[i+1]- Ii[i];
3260     JJ      = J + Ii[i];
3261     nnz_max = PetscMax(nnz_max,nnz);
3262     for (j=0; j<nnz; j++) {
3263       if (*JJ >= cstart) break;
3264       JJ++;
3265     }
3266     d = 0;
3267     for (; j<nnz; j++) {
3268       if (*JJ++ >= cend) break;
3269       d++;
3270     }
3271     d_nnz[i] = d;
3272     o_nnz[i] = nnz - d;
3273   }
3274   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3275   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3276 
3277   if (v) values = (PetscScalar*)v;
3278   else {
3279     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3280     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3281   }
3282 
3283   for (i=0; i<m; i++) {
3284     ii   = i + rstart;
3285     nnz  = Ii[i+1]- Ii[i];
3286     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3287   }
3288   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3289   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3290 
3291   if (!v) {
3292     ierr = PetscFree(values);CHKERRQ(ierr);
3293   }
3294   PetscFunctionReturn(0);
3295 }
3296 EXTERN_C_END
3297 
3298 #undef __FUNCT__
3299 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3300 /*@
3301    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3302    (the default parallel PETSc format).
3303 
3304    Collective on MPI_Comm
3305 
3306    Input Parameters:
3307 +  B - the matrix
3308 .  i - the indices into j for the start of each local row (starts with zero)
3309 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3310 -  v - optional values in the matrix
3311 
3312    Level: developer
3313 
3314    Notes:
3315        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3316      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3317      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3318 
3319        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3320 
3321        The format which is used for the sparse matrix input, is equivalent to a
3322     row-major ordering.. i.e for the following matrix, the input data expected is
3323     as shown:
3324 
3325         1 0 0
3326         2 0 3     P0
3327        -------
3328         4 5 6     P1
3329 
3330      Process0 [P0]: rows_owned=[0,1]
3331         i =  {0,1,3}  [size = nrow+1  = 2+1]
3332         j =  {0,0,2}  [size = nz = 6]
3333         v =  {1,2,3}  [size = nz = 6]
3334 
3335      Process1 [P1]: rows_owned=[2]
3336         i =  {0,3}    [size = nrow+1  = 1+1]
3337         j =  {0,1,2}  [size = nz = 6]
3338         v =  {4,5,6}  [size = nz = 6]
3339 
3340       The column indices for each row MUST be sorted.
3341 
3342 .keywords: matrix, aij, compressed row, sparse, parallel
3343 
3344 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3345           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3346 @*/
3347 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3348 {
3349   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3350 
3351   PetscFunctionBegin;
3352   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3353   if (f) {
3354     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3355   }
3356   PetscFunctionReturn(0);
3357 }
3358 
3359 #undef __FUNCT__
3360 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3361 /*@C
3362    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3363    (the default parallel PETSc format).  For good matrix assembly performance
3364    the user should preallocate the matrix storage by setting the parameters
3365    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3366    performance can be increased by more than a factor of 50.
3367 
3368    Collective on MPI_Comm
3369 
3370    Input Parameters:
3371 +  A - the matrix
3372 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3373            (same value is used for all local rows)
3374 .  d_nnz - array containing the number of nonzeros in the various rows of the
3375            DIAGONAL portion of the local submatrix (possibly different for each row)
3376            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3377            The size of this array is equal to the number of local rows, i.e 'm'.
3378            You must leave room for the diagonal entry even if it is zero.
3379 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3380            submatrix (same value is used for all local rows).
3381 -  o_nnz - array containing the number of nonzeros in the various rows of the
3382            OFF-DIAGONAL portion of the local submatrix (possibly different for
3383            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3384            structure. The size of this array is equal to the number
3385            of local rows, i.e 'm'.
3386 
3387    If the *_nnz parameter is given then the *_nz parameter is ignored
3388 
3389    The AIJ format (also called the Yale sparse matrix format or
3390    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3391    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3392 
3393    The parallel matrix is partitioned such that the first m0 rows belong to
3394    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3395    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3396 
3397    The DIAGONAL portion of the local submatrix of a processor can be defined
3398    as the submatrix which is obtained by extraction the part corresponding
3399    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3400    first row that belongs to the processor, and r2 is the last row belonging
3401    to the this processor. This is a square mxm matrix. The remaining portion
3402    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3403 
3404    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3405 
3406    You can call MatGetInfo() to get information on how effective the preallocation was;
3407    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3408    You can also run with the option -info and look for messages with the string
3409    malloc in them to see if additional memory allocation was needed.
3410 
3411    Example usage:
3412 
3413    Consider the following 8x8 matrix with 34 non-zero values, that is
3414    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3415    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3416    as follows:
3417 
3418 .vb
3419             1  2  0  |  0  3  0  |  0  4
3420     Proc0   0  5  6  |  7  0  0  |  8  0
3421             9  0 10  | 11  0  0  | 12  0
3422     -------------------------------------
3423            13  0 14  | 15 16 17  |  0  0
3424     Proc1   0 18  0  | 19 20 21  |  0  0
3425             0  0  0  | 22 23  0  | 24  0
3426     -------------------------------------
3427     Proc2  25 26 27  |  0  0 28  | 29  0
3428            30  0  0  | 31 32 33  |  0 34
3429 .ve
3430 
3431    This can be represented as a collection of submatrices as:
3432 
3433 .vb
3434       A B C
3435       D E F
3436       G H I
3437 .ve
3438 
3439    Where the submatrices A,B,C are owned by proc0, D,E,F are
3440    owned by proc1, G,H,I are owned by proc2.
3441 
3442    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3443    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3444    The 'M','N' parameters are 8,8, and have the same values on all procs.
3445 
3446    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3447    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3448    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3449    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3450    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3451    matrix, ans [DF] as another SeqAIJ matrix.
3452 
3453    When d_nz, o_nz parameters are specified, d_nz storage elements are
3454    allocated for every row of the local diagonal submatrix, and o_nz
3455    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3456    One way to choose d_nz and o_nz is to use the max nonzerors per local
3457    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3458    In this case, the values of d_nz,o_nz are:
3459 .vb
3460      proc0 : dnz = 2, o_nz = 2
3461      proc1 : dnz = 3, o_nz = 2
3462      proc2 : dnz = 1, o_nz = 4
3463 .ve
3464    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3465    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3466    for proc3. i.e we are using 12+15+10=37 storage locations to store
3467    34 values.
3468 
3469    When d_nnz, o_nnz parameters are specified, the storage is specified
3470    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3471    In the above case the values for d_nnz,o_nnz are:
3472 .vb
3473      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3474      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3475      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3476 .ve
3477    Here the space allocated is sum of all the above values i.e 34, and
3478    hence pre-allocation is perfect.
3479 
3480    Level: intermediate
3481 
3482 .keywords: matrix, aij, compressed row, sparse, parallel
3483 
3484 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3485           MPIAIJ, MatGetInfo()
3486 @*/
3487 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3488 {
3489   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3490 
3491   PetscFunctionBegin;
3492   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3493   if (f) {
3494     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3495   }
3496   PetscFunctionReturn(0);
3497 }
3498 
3499 #undef __FUNCT__
3500 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3501 /*@
3502      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3503          CSR format the local rows.
3504 
3505    Collective on MPI_Comm
3506 
3507    Input Parameters:
3508 +  comm - MPI communicator
3509 .  m - number of local rows (Cannot be PETSC_DECIDE)
3510 .  n - This value should be the same as the local size used in creating the
3511        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3512        calculated if N is given) For square matrices n is almost always m.
3513 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3514 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3515 .   i - row indices
3516 .   j - column indices
3517 -   a - matrix values
3518 
3519    Output Parameter:
3520 .   mat - the matrix
3521 
3522    Level: intermediate
3523 
3524    Notes:
3525        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3526      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3527      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3528 
3529        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3530 
3531        The format which is used for the sparse matrix input, is equivalent to a
3532     row-major ordering.. i.e for the following matrix, the input data expected is
3533     as shown:
3534 
3535         1 0 0
3536         2 0 3     P0
3537        -------
3538         4 5 6     P1
3539 
3540      Process0 [P0]: rows_owned=[0,1]
3541         i =  {0,1,3}  [size = nrow+1  = 2+1]
3542         j =  {0,0,2}  [size = nz = 6]
3543         v =  {1,2,3}  [size = nz = 6]
3544 
3545      Process1 [P1]: rows_owned=[2]
3546         i =  {0,3}    [size = nrow+1  = 1+1]
3547         j =  {0,1,2}  [size = nz = 6]
3548         v =  {4,5,6}  [size = nz = 6]
3549 
3550 .keywords: matrix, aij, compressed row, sparse, parallel
3551 
3552 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3553           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3554 @*/
3555 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3556 {
3557   PetscErrorCode ierr;
3558 
3559  PetscFunctionBegin;
3560   if (i[0]) {
3561     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3562   }
3563   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3564   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3565   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3566   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3567   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3568   PetscFunctionReturn(0);
3569 }
3570 
3571 #undef __FUNCT__
3572 #define __FUNCT__ "MatCreateMPIAIJ"
3573 /*@C
3574    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3575    (the default parallel PETSc format).  For good matrix assembly performance
3576    the user should preallocate the matrix storage by setting the parameters
3577    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3578    performance can be increased by more than a factor of 50.
3579 
3580    Collective on MPI_Comm
3581 
3582    Input Parameters:
3583 +  comm - MPI communicator
3584 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3585            This value should be the same as the local size used in creating the
3586            y vector for the matrix-vector product y = Ax.
3587 .  n - This value should be the same as the local size used in creating the
3588        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3589        calculated if N is given) For square matrices n is almost always m.
3590 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3591 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3592 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3593            (same value is used for all local rows)
3594 .  d_nnz - array containing the number of nonzeros in the various rows of the
3595            DIAGONAL portion of the local submatrix (possibly different for each row)
3596            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3597            The size of this array is equal to the number of local rows, i.e 'm'.
3598            You must leave room for the diagonal entry even if it is zero.
3599 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3600            submatrix (same value is used for all local rows).
3601 -  o_nnz - array containing the number of nonzeros in the various rows of the
3602            OFF-DIAGONAL portion of the local submatrix (possibly different for
3603            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3604            structure. The size of this array is equal to the number
3605            of local rows, i.e 'm'.
3606 
3607    Output Parameter:
3608 .  A - the matrix
3609 
3610    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3611    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3612    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3613 
3614    Notes:
3615    If the *_nnz parameter is given then the *_nz parameter is ignored
3616 
3617    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3618    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3619    storage requirements for this matrix.
3620 
3621    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3622    processor than it must be used on all processors that share the object for
3623    that argument.
3624 
3625    The user MUST specify either the local or global matrix dimensions
3626    (possibly both).
3627 
3628    The parallel matrix is partitioned across processors such that the
3629    first m0 rows belong to process 0, the next m1 rows belong to
3630    process 1, the next m2 rows belong to process 2 etc.. where
3631    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3632    values corresponding to [m x N] submatrix.
3633 
3634    The columns are logically partitioned with the n0 columns belonging
3635    to 0th partition, the next n1 columns belonging to the next
3636    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3637 
3638    The DIAGONAL portion of the local submatrix on any given processor
3639    is the submatrix corresponding to the rows and columns m,n
3640    corresponding to the given processor. i.e diagonal matrix on
3641    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3642    etc. The remaining portion of the local submatrix [m x (N-n)]
3643    constitute the OFF-DIAGONAL portion. The example below better
3644    illustrates this concept.
3645 
3646    For a square global matrix we define each processor's diagonal portion
3647    to be its local rows and the corresponding columns (a square submatrix);
3648    each processor's off-diagonal portion encompasses the remainder of the
3649    local matrix (a rectangular submatrix).
3650 
3651    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3652 
3653    When calling this routine with a single process communicator, a matrix of
3654    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3655    type of communicator, use the construction mechanism:
3656      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3657 
3658    By default, this format uses inodes (identical nodes) when possible.
3659    We search for consecutive rows with the same nonzero structure, thereby
3660    reusing matrix information to achieve increased efficiency.
3661 
3662    Options Database Keys:
3663 +  -mat_no_inode  - Do not use inodes
3664 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3665 -  -mat_aij_oneindex - Internally use indexing starting at 1
3666         rather than 0.  Note that when calling MatSetValues(),
3667         the user still MUST index entries starting at 0!
3668 
3669 
3670    Example usage:
3671 
3672    Consider the following 8x8 matrix with 34 non-zero values, that is
3673    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3674    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3675    as follows:
3676 
3677 .vb
3678             1  2  0  |  0  3  0  |  0  4
3679     Proc0   0  5  6  |  7  0  0  |  8  0
3680             9  0 10  | 11  0  0  | 12  0
3681     -------------------------------------
3682            13  0 14  | 15 16 17  |  0  0
3683     Proc1   0 18  0  | 19 20 21  |  0  0
3684             0  0  0  | 22 23  0  | 24  0
3685     -------------------------------------
3686     Proc2  25 26 27  |  0  0 28  | 29  0
3687            30  0  0  | 31 32 33  |  0 34
3688 .ve
3689 
3690    This can be represented as a collection of submatrices as:
3691 
3692 .vb
3693       A B C
3694       D E F
3695       G H I
3696 .ve
3697 
3698    Where the submatrices A,B,C are owned by proc0, D,E,F are
3699    owned by proc1, G,H,I are owned by proc2.
3700 
3701    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3702    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3703    The 'M','N' parameters are 8,8, and have the same values on all procs.
3704 
3705    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3706    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3707    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3708    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3709    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3710    matrix, ans [DF] as another SeqAIJ matrix.
3711 
3712    When d_nz, o_nz parameters are specified, d_nz storage elements are
3713    allocated for every row of the local diagonal submatrix, and o_nz
3714    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3715    One way to choose d_nz and o_nz is to use the max nonzerors per local
3716    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3717    In this case, the values of d_nz,o_nz are:
3718 .vb
3719      proc0 : dnz = 2, o_nz = 2
3720      proc1 : dnz = 3, o_nz = 2
3721      proc2 : dnz = 1, o_nz = 4
3722 .ve
3723    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3724    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3725    for proc3. i.e we are using 12+15+10=37 storage locations to store
3726    34 values.
3727 
3728    When d_nnz, o_nnz parameters are specified, the storage is specified
3729    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3730    In the above case the values for d_nnz,o_nnz are:
3731 .vb
3732      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3733      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3734      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3735 .ve
3736    Here the space allocated is sum of all the above values i.e 34, and
3737    hence pre-allocation is perfect.
3738 
3739    Level: intermediate
3740 
3741 .keywords: matrix, aij, compressed row, sparse, parallel
3742 
3743 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3744           MPIAIJ, MatCreateMPIAIJWithArrays()
3745 @*/
3746 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3747 {
3748   PetscErrorCode ierr;
3749   PetscMPIInt    size;
3750 
3751   PetscFunctionBegin;
3752   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3753   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3754   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3755   if (size > 1) {
3756     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3757     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3758   } else {
3759     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3760     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3761   }
3762   PetscFunctionReturn(0);
3763 }
3764 
3765 #undef __FUNCT__
3766 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3767 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3768 {
3769   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3770 
3771   PetscFunctionBegin;
3772   *Ad     = a->A;
3773   *Ao     = a->B;
3774   *colmap = a->garray;
3775   PetscFunctionReturn(0);
3776 }
3777 
3778 #undef __FUNCT__
3779 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3780 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3781 {
3782   PetscErrorCode ierr;
3783   PetscInt       i;
3784   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3785 
3786   PetscFunctionBegin;
3787   if (coloring->ctype == IS_COLORING_GLOBAL) {
3788     ISColoringValue *allcolors,*colors;
3789     ISColoring      ocoloring;
3790 
3791     /* set coloring for diagonal portion */
3792     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3793 
3794     /* set coloring for off-diagonal portion */
3795     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3796     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3797     for (i=0; i<a->B->cmap->n; i++) {
3798       colors[i] = allcolors[a->garray[i]];
3799     }
3800     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3801     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3802     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3803     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3804   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3805     ISColoringValue *colors;
3806     PetscInt        *larray;
3807     ISColoring      ocoloring;
3808 
3809     /* set coloring for diagonal portion */
3810     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3811     for (i=0; i<a->A->cmap->n; i++) {
3812       larray[i] = i + A->cmap->rstart;
3813     }
3814     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3815     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3816     for (i=0; i<a->A->cmap->n; i++) {
3817       colors[i] = coloring->colors[larray[i]];
3818     }
3819     ierr = PetscFree(larray);CHKERRQ(ierr);
3820     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3821     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3822     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3823 
3824     /* set coloring for off-diagonal portion */
3825     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3826     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3827     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3828     for (i=0; i<a->B->cmap->n; i++) {
3829       colors[i] = coloring->colors[larray[i]];
3830     }
3831     ierr = PetscFree(larray);CHKERRQ(ierr);
3832     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3833     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3834     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3835   } else {
3836     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3837   }
3838 
3839   PetscFunctionReturn(0);
3840 }
3841 
3842 #if defined(PETSC_HAVE_ADIC)
3843 #undef __FUNCT__
3844 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3845 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3846 {
3847   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3848   PetscErrorCode ierr;
3849 
3850   PetscFunctionBegin;
3851   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3852   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3853   PetscFunctionReturn(0);
3854 }
3855 #endif
3856 
3857 #undef __FUNCT__
3858 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3859 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3860 {
3861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3862   PetscErrorCode ierr;
3863 
3864   PetscFunctionBegin;
3865   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3866   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3867   PetscFunctionReturn(0);
3868 }
3869 
3870 #undef __FUNCT__
3871 #define __FUNCT__ "MatMerge"
3872 /*@
3873       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3874                  matrices from each processor
3875 
3876     Collective on MPI_Comm
3877 
3878    Input Parameters:
3879 +    comm - the communicators the parallel matrix will live on
3880 .    inmat - the input sequential matrices
3881 .    n - number of local columns (or PETSC_DECIDE)
3882 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3883 
3884    Output Parameter:
3885 .    outmat - the parallel matrix generated
3886 
3887     Level: advanced
3888 
3889    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3890 
3891 @*/
3892 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3893 {
3894   PetscErrorCode ierr;
3895   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3896   PetscInt       *indx;
3897   PetscScalar    *values;
3898 
3899   PetscFunctionBegin;
3900   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3901   if (scall == MAT_INITIAL_MATRIX){
3902     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3903     if (n == PETSC_DECIDE){
3904       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3905     }
3906     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3907     rstart -= m;
3908 
3909     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3910     for (i=0;i<m;i++) {
3911       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3912       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3913       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3914     }
3915     /* This routine will ONLY return MPIAIJ type matrix */
3916     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3917     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3918     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3919     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3920     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3921 
3922   } else if (scall == MAT_REUSE_MATRIX){
3923     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3924   } else {
3925     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3926   }
3927 
3928   for (i=0;i<m;i++) {
3929     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3930     Ii    = i + rstart;
3931     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3932     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3933   }
3934   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3935   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3936   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3937 
3938   PetscFunctionReturn(0);
3939 }
3940 
3941 #undef __FUNCT__
3942 #define __FUNCT__ "MatFileSplit"
3943 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3944 {
3945   PetscErrorCode    ierr;
3946   PetscMPIInt       rank;
3947   PetscInt          m,N,i,rstart,nnz;
3948   size_t            len;
3949   const PetscInt    *indx;
3950   PetscViewer       out;
3951   char              *name;
3952   Mat               B;
3953   const PetscScalar *values;
3954 
3955   PetscFunctionBegin;
3956   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3957   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3958   /* Should this be the type of the diagonal block of A? */
3959   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3960   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3961   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3962   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3963   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3964   for (i=0;i<m;i++) {
3965     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3966     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3967     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3968   }
3969   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3970   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3971 
3972   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3973   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3974   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3975   sprintf(name,"%s.%d",outfile,rank);
3976   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3977   ierr = PetscFree(name);
3978   ierr = MatView(B,out);CHKERRQ(ierr);
3979   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3980   ierr = MatDestroy(B);CHKERRQ(ierr);
3981   PetscFunctionReturn(0);
3982 }
3983 
3984 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3985 #undef __FUNCT__
3986 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3987 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3988 {
3989   PetscErrorCode       ierr;
3990   Mat_Merge_SeqsToMPI  *merge;
3991   PetscContainer       container;
3992 
3993   PetscFunctionBegin;
3994   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3995   if (container) {
3996     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4002     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4003     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4004     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4005     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4006     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4007     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
4008 
4009     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4010     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4011   }
4012   ierr = PetscFree(merge);CHKERRQ(ierr);
4013 
4014   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4015   PetscFunctionReturn(0);
4016 }
4017 
4018 #include "../src/mat/utils/freespace.h"
4019 #include "petscbt.h"
4020 
4021 #undef __FUNCT__
4022 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4023 /*@C
4024       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4025                  matrices from each processor
4026 
4027     Collective on MPI_Comm
4028 
4029    Input Parameters:
4030 +    comm - the communicators the parallel matrix will live on
4031 .    seqmat - the input sequential matrices
4032 .    m - number of local rows (or PETSC_DECIDE)
4033 .    n - number of local columns (or PETSC_DECIDE)
4034 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4035 
4036    Output Parameter:
4037 .    mpimat - the parallel matrix generated
4038 
4039     Level: advanced
4040 
4041    Notes:
4042      The dimensions of the sequential matrix in each processor MUST be the same.
4043      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4044      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4045 @*/
4046 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4047 {
4048   PetscErrorCode       ierr;
4049   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4050   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4051   PetscMPIInt          size,rank,taga,*len_s;
4052   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4053   PetscInt             proc,m;
4054   PetscInt             **buf_ri,**buf_rj;
4055   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4056   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4057   MPI_Request          *s_waits,*r_waits;
4058   MPI_Status           *status;
4059   MatScalar            *aa=a->a;
4060   MatScalar            **abuf_r,*ba_i;
4061   Mat_Merge_SeqsToMPI  *merge;
4062   PetscContainer       container;
4063 
4064   PetscFunctionBegin;
4065   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4066 
4067   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4068   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4069 
4070   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4071   if (container) {
4072     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4073   }
4074   bi     = merge->bi;
4075   bj     = merge->bj;
4076   buf_ri = merge->buf_ri;
4077   buf_rj = merge->buf_rj;
4078 
4079   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4080   owners = merge->rowmap.range;
4081   len_s  = merge->len_s;
4082 
4083   /* send and recv matrix values */
4084   /*-----------------------------*/
4085   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4086   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4087 
4088   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4089   for (proc=0,k=0; proc<size; proc++){
4090     if (!len_s[proc]) continue;
4091     i = owners[proc];
4092     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4093     k++;
4094   }
4095 
4096   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4097   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4098   ierr = PetscFree(status);CHKERRQ(ierr);
4099 
4100   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4101   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4102 
4103   /* insert mat values of mpimat */
4104   /*----------------------------*/
4105   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4106   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4107   nextrow = buf_ri_k + merge->nrecv;
4108   nextai  = nextrow + merge->nrecv;
4109 
4110   for (k=0; k<merge->nrecv; k++){
4111     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4112     nrows = *(buf_ri_k[k]);
4113     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4114     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4115   }
4116 
4117   /* set values of ba */
4118   m = merge->rowmap.n;
4119   for (i=0; i<m; i++) {
4120     arow = owners[rank] + i;
4121     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4122     bnzi = bi[i+1] - bi[i];
4123     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4124 
4125     /* add local non-zero vals of this proc's seqmat into ba */
4126     anzi = ai[arow+1] - ai[arow];
4127     aj   = a->j + ai[arow];
4128     aa   = a->a + ai[arow];
4129     nextaj = 0;
4130     for (j=0; nextaj<anzi; j++){
4131       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4132         ba_i[j] += aa[nextaj++];
4133       }
4134     }
4135 
4136     /* add received vals into ba */
4137     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4138       /* i-th row */
4139       if (i == *nextrow[k]) {
4140         anzi = *(nextai[k]+1) - *nextai[k];
4141         aj   = buf_rj[k] + *(nextai[k]);
4142         aa   = abuf_r[k] + *(nextai[k]);
4143         nextaj = 0;
4144         for (j=0; nextaj<anzi; j++){
4145           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4146             ba_i[j] += aa[nextaj++];
4147           }
4148         }
4149         nextrow[k]++; nextai[k]++;
4150       }
4151     }
4152     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4153   }
4154   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4155   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4156 
4157   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4158   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4159   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4160   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4161   PetscFunctionReturn(0);
4162 }
4163 
4164 #undef __FUNCT__
4165 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4166 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4167 {
4168   PetscErrorCode       ierr;
4169   Mat                  B_mpi;
4170   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4171   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4172   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4173   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4174   PetscInt             len,proc,*dnz,*onz;
4175   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4176   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4177   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4178   MPI_Status           *status;
4179   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4180   PetscBT              lnkbt;
4181   Mat_Merge_SeqsToMPI  *merge;
4182   PetscContainer       container;
4183 
4184   PetscFunctionBegin;
4185   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4186 
4187   /* make sure it is a PETSc comm */
4188   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4189   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4190   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4191 
4192   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4193   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4194 
4195   /* determine row ownership */
4196   /*---------------------------------------------------------*/
4197   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4198   merge->rowmap.n = m;
4199   merge->rowmap.N = M;
4200   merge->rowmap.bs = 1;
4201   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4202   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4203   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4204 
4205   m      = merge->rowmap.n;
4206   M      = merge->rowmap.N;
4207   owners = merge->rowmap.range;
4208 
4209   /* determine the number of messages to send, their lengths */
4210   /*---------------------------------------------------------*/
4211   len_s  = merge->len_s;
4212 
4213   len = 0;  /* length of buf_si[] */
4214   merge->nsend = 0;
4215   for (proc=0; proc<size; proc++){
4216     len_si[proc] = 0;
4217     if (proc == rank){
4218       len_s[proc] = 0;
4219     } else {
4220       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4221       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4222     }
4223     if (len_s[proc]) {
4224       merge->nsend++;
4225       nrows = 0;
4226       for (i=owners[proc]; i<owners[proc+1]; i++){
4227         if (ai[i+1] > ai[i]) nrows++;
4228       }
4229       len_si[proc] = 2*(nrows+1);
4230       len += len_si[proc];
4231     }
4232   }
4233 
4234   /* determine the number and length of messages to receive for ij-structure */
4235   /*-------------------------------------------------------------------------*/
4236   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4237   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4238 
4239   /* post the Irecv of j-structure */
4240   /*-------------------------------*/
4241   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4242   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4243 
4244   /* post the Isend of j-structure */
4245   /*--------------------------------*/
4246   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4247   sj_waits = si_waits + merge->nsend;
4248 
4249   for (proc=0, k=0; proc<size; proc++){
4250     if (!len_s[proc]) continue;
4251     i = owners[proc];
4252     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4253     k++;
4254   }
4255 
4256   /* receives and sends of j-structure are complete */
4257   /*------------------------------------------------*/
4258   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4259   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4260 
4261   /* send and recv i-structure */
4262   /*---------------------------*/
4263   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4264   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4265 
4266   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4267   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4268   for (proc=0,k=0; proc<size; proc++){
4269     if (!len_s[proc]) continue;
4270     /* form outgoing message for i-structure:
4271          buf_si[0]:                 nrows to be sent
4272                [1:nrows]:           row index (global)
4273                [nrows+1:2*nrows+1]: i-structure index
4274     */
4275     /*-------------------------------------------*/
4276     nrows = len_si[proc]/2 - 1;
4277     buf_si_i    = buf_si + nrows+1;
4278     buf_si[0]   = nrows;
4279     buf_si_i[0] = 0;
4280     nrows = 0;
4281     for (i=owners[proc]; i<owners[proc+1]; i++){
4282       anzi = ai[i+1] - ai[i];
4283       if (anzi) {
4284         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4285         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4286         nrows++;
4287       }
4288     }
4289     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4290     k++;
4291     buf_si += len_si[proc];
4292   }
4293 
4294   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4295   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4296 
4297   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4298   for (i=0; i<merge->nrecv; i++){
4299     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4300   }
4301 
4302   ierr = PetscFree(len_si);CHKERRQ(ierr);
4303   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4304   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4305   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4306   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4307   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4308   ierr = PetscFree(status);CHKERRQ(ierr);
4309 
4310   /* compute a local seq matrix in each processor */
4311   /*----------------------------------------------*/
4312   /* allocate bi array and free space for accumulating nonzero column info */
4313   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4314   bi[0] = 0;
4315 
4316   /* create and initialize a linked list */
4317   nlnk = N+1;
4318   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4319 
4320   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4321   len = 0;
4322   len  = ai[owners[rank+1]] - ai[owners[rank]];
4323   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4324   current_space = free_space;
4325 
4326   /* determine symbolic info for each local row */
4327   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4328   nextrow = buf_ri_k + merge->nrecv;
4329   nextai  = nextrow + merge->nrecv;
4330   for (k=0; k<merge->nrecv; k++){
4331     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4332     nrows = *buf_ri_k[k];
4333     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4334     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4335   }
4336 
4337   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4338   len = 0;
4339   for (i=0;i<m;i++) {
4340     bnzi   = 0;
4341     /* add local non-zero cols of this proc's seqmat into lnk */
4342     arow   = owners[rank] + i;
4343     anzi   = ai[arow+1] - ai[arow];
4344     aj     = a->j + ai[arow];
4345     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4346     bnzi += nlnk;
4347     /* add received col data into lnk */
4348     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4349       if (i == *nextrow[k]) { /* i-th row */
4350         anzi = *(nextai[k]+1) - *nextai[k];
4351         aj   = buf_rj[k] + *nextai[k];
4352         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4353         bnzi += nlnk;
4354         nextrow[k]++; nextai[k]++;
4355       }
4356     }
4357     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4358 
4359     /* if free space is not available, make more free space */
4360     if (current_space->local_remaining<bnzi) {
4361       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4362       nspacedouble++;
4363     }
4364     /* copy data into free space, then initialize lnk */
4365     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4366     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4367 
4368     current_space->array           += bnzi;
4369     current_space->local_used      += bnzi;
4370     current_space->local_remaining -= bnzi;
4371 
4372     bi[i+1] = bi[i] + bnzi;
4373   }
4374 
4375   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4376 
4377   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4378   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4379   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4380 
4381   /* create symbolic parallel matrix B_mpi */
4382   /*---------------------------------------*/
4383   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4384   if (n==PETSC_DECIDE) {
4385     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4386   } else {
4387     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4388   }
4389   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4390   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4391   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4392 
4393   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4394   B_mpi->assembled     = PETSC_FALSE;
4395   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4396   merge->bi            = bi;
4397   merge->bj            = bj;
4398   merge->buf_ri        = buf_ri;
4399   merge->buf_rj        = buf_rj;
4400   merge->coi           = PETSC_NULL;
4401   merge->coj           = PETSC_NULL;
4402   merge->owners_co     = PETSC_NULL;
4403 
4404   /* attach the supporting struct to B_mpi for reuse */
4405   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4406   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4407   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4408   *mpimat = B_mpi;
4409 
4410   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4411   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4412   PetscFunctionReturn(0);
4413 }
4414 
4415 #undef __FUNCT__
4416 #define __FUNCT__ "MatMerge_SeqsToMPI"
4417 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4418 {
4419   PetscErrorCode   ierr;
4420 
4421   PetscFunctionBegin;
4422   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4423   if (scall == MAT_INITIAL_MATRIX){
4424     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4425   }
4426   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4427   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4428   PetscFunctionReturn(0);
4429 }
4430 
4431 #undef __FUNCT__
4432 #define __FUNCT__ "MatGetLocalMat"
4433 /*@
4434      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4435 
4436     Not Collective
4437 
4438    Input Parameters:
4439 +    A - the matrix
4440 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4441 
4442    Output Parameter:
4443 .    A_loc - the local sequential matrix generated
4444 
4445     Level: developer
4446 
4447 @*/
4448 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4449 {
4450   PetscErrorCode  ierr;
4451   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4452   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4453   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4454   MatScalar       *aa=a->a,*ba=b->a,*cam;
4455   PetscScalar     *ca;
4456   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4457   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4458 
4459   PetscFunctionBegin;
4460   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4461   if (scall == MAT_INITIAL_MATRIX){
4462     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4463     ci[0] = 0;
4464     for (i=0; i<am; i++){
4465       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4466     }
4467     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4468     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4469     k = 0;
4470     for (i=0; i<am; i++) {
4471       ncols_o = bi[i+1] - bi[i];
4472       ncols_d = ai[i+1] - ai[i];
4473       /* off-diagonal portion of A */
4474       for (jo=0; jo<ncols_o; jo++) {
4475         col = cmap[*bj];
4476         if (col >= cstart) break;
4477         cj[k]   = col; bj++;
4478         ca[k++] = *ba++;
4479       }
4480       /* diagonal portion of A */
4481       for (j=0; j<ncols_d; j++) {
4482         cj[k]   = cstart + *aj++;
4483         ca[k++] = *aa++;
4484       }
4485       /* off-diagonal portion of A */
4486       for (j=jo; j<ncols_o; j++) {
4487         cj[k]   = cmap[*bj++];
4488         ca[k++] = *ba++;
4489       }
4490     }
4491     /* put together the new matrix */
4492     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4493     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4494     /* Since these are PETSc arrays, change flags to free them as necessary. */
4495     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4496     mat->free_a  = PETSC_TRUE;
4497     mat->free_ij = PETSC_TRUE;
4498     mat->nonew   = 0;
4499   } else if (scall == MAT_REUSE_MATRIX){
4500     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4501     ci = mat->i; cj = mat->j; cam = mat->a;
4502     for (i=0; i<am; i++) {
4503       /* off-diagonal portion of A */
4504       ncols_o = bi[i+1] - bi[i];
4505       for (jo=0; jo<ncols_o; jo++) {
4506         col = cmap[*bj];
4507         if (col >= cstart) break;
4508         *cam++ = *ba++; bj++;
4509       }
4510       /* diagonal portion of A */
4511       ncols_d = ai[i+1] - ai[i];
4512       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4513       /* off-diagonal portion of A */
4514       for (j=jo; j<ncols_o; j++) {
4515         *cam++ = *ba++; bj++;
4516       }
4517     }
4518   } else {
4519     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4520   }
4521 
4522   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4523   PetscFunctionReturn(0);
4524 }
4525 
4526 #undef __FUNCT__
4527 #define __FUNCT__ "MatGetLocalMatCondensed"
4528 /*@C
4529      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4530 
4531     Not Collective
4532 
4533    Input Parameters:
4534 +    A - the matrix
4535 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4536 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4537 
4538    Output Parameter:
4539 .    A_loc - the local sequential matrix generated
4540 
4541     Level: developer
4542 
4543 @*/
4544 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4545 {
4546   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4547   PetscErrorCode    ierr;
4548   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4549   IS                isrowa,iscola;
4550   Mat               *aloc;
4551 
4552   PetscFunctionBegin;
4553   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4554   if (!row){
4555     start = A->rmap->rstart; end = A->rmap->rend;
4556     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4557   } else {
4558     isrowa = *row;
4559   }
4560   if (!col){
4561     start = A->cmap->rstart;
4562     cmap  = a->garray;
4563     nzA   = a->A->cmap->n;
4564     nzB   = a->B->cmap->n;
4565     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4566     ncols = 0;
4567     for (i=0; i<nzB; i++) {
4568       if (cmap[i] < start) idx[ncols++] = cmap[i];
4569       else break;
4570     }
4571     imark = i;
4572     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4573     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4574     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4575     ierr = PetscFree(idx);CHKERRQ(ierr);
4576   } else {
4577     iscola = *col;
4578   }
4579   if (scall != MAT_INITIAL_MATRIX){
4580     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4581     aloc[0] = *A_loc;
4582   }
4583   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4584   *A_loc = aloc[0];
4585   ierr = PetscFree(aloc);CHKERRQ(ierr);
4586   if (!row){
4587     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4588   }
4589   if (!col){
4590     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4591   }
4592   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4593   PetscFunctionReturn(0);
4594 }
4595 
4596 #undef __FUNCT__
4597 #define __FUNCT__ "MatGetBrowsOfAcols"
4598 /*@C
4599     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4600 
4601     Collective on Mat
4602 
4603    Input Parameters:
4604 +    A,B - the matrices in mpiaij format
4605 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4606 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4607 
4608    Output Parameter:
4609 +    rowb, colb - index sets of rows and columns of B to extract
4610 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4611 -    B_seq - the sequential matrix generated
4612 
4613     Level: developer
4614 
4615 @*/
4616 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4617 {
4618   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4619   PetscErrorCode    ierr;
4620   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4621   IS                isrowb,iscolb;
4622   Mat               *bseq;
4623 
4624   PetscFunctionBegin;
4625   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4626     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4627   }
4628   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4629 
4630   if (scall == MAT_INITIAL_MATRIX){
4631     start = A->cmap->rstart;
4632     cmap  = a->garray;
4633     nzA   = a->A->cmap->n;
4634     nzB   = a->B->cmap->n;
4635     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4636     ncols = 0;
4637     for (i=0; i<nzB; i++) {  /* row < local row index */
4638       if (cmap[i] < start) idx[ncols++] = cmap[i];
4639       else break;
4640     }
4641     imark = i;
4642     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4643     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4644     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4645     ierr = PetscFree(idx);CHKERRQ(ierr);
4646     *brstart = imark;
4647     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4648   } else {
4649     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4650     isrowb = *rowb; iscolb = *colb;
4651     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4652     bseq[0] = *B_seq;
4653   }
4654   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4655   *B_seq = bseq[0];
4656   ierr = PetscFree(bseq);CHKERRQ(ierr);
4657   if (!rowb){
4658     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4659   } else {
4660     *rowb = isrowb;
4661   }
4662   if (!colb){
4663     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4664   } else {
4665     *colb = iscolb;
4666   }
4667   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4668   PetscFunctionReturn(0);
4669 }
4670 
4671 #undef __FUNCT__
4672 #define __FUNCT__ "MatGetBrowsOfAoCols"
4673 /*@C
4674     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4675     of the OFF-DIAGONAL portion of local A
4676 
4677     Collective on Mat
4678 
4679    Input Parameters:
4680 +    A,B - the matrices in mpiaij format
4681 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4682 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4683 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4684 
4685    Output Parameter:
4686 +    B_oth - the sequential matrix generated
4687 
4688     Level: developer
4689 
4690 @*/
4691 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4692 {
4693   VecScatter_MPI_General *gen_to,*gen_from;
4694   PetscErrorCode         ierr;
4695   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4696   Mat_SeqAIJ             *b_oth;
4697   VecScatter             ctx=a->Mvctx;
4698   MPI_Comm               comm=((PetscObject)ctx)->comm;
4699   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4700   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4701   PetscScalar            *rvalues,*svalues;
4702   MatScalar              *b_otha,*bufa,*bufA;
4703   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4704   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4705   MPI_Status             *sstatus,rstatus;
4706   PetscMPIInt            jj;
4707   PetscInt               *cols,sbs,rbs;
4708   PetscScalar            *vals;
4709 
4710   PetscFunctionBegin;
4711   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4712     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4713   }
4714   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4715   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4716 
4717   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4718   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4719   rvalues  = gen_from->values; /* holds the length of receiving row */
4720   svalues  = gen_to->values;   /* holds the length of sending row */
4721   nrecvs   = gen_from->n;
4722   nsends   = gen_to->n;
4723 
4724   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4725   srow     = gen_to->indices;   /* local row index to be sent */
4726   sstarts  = gen_to->starts;
4727   sprocs   = gen_to->procs;
4728   sstatus  = gen_to->sstatus;
4729   sbs      = gen_to->bs;
4730   rstarts  = gen_from->starts;
4731   rprocs   = gen_from->procs;
4732   rbs      = gen_from->bs;
4733 
4734   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4735   if (scall == MAT_INITIAL_MATRIX){
4736     /* i-array */
4737     /*---------*/
4738     /*  post receives */
4739     for (i=0; i<nrecvs; i++){
4740       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4741       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4742       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4743     }
4744 
4745     /* pack the outgoing message */
4746     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4747     rstartsj = sstartsj + nsends +1;
4748     sstartsj[0] = 0;  rstartsj[0] = 0;
4749     len = 0; /* total length of j or a array to be sent */
4750     k = 0;
4751     for (i=0; i<nsends; i++){
4752       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4753       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4754       for (j=0; j<nrows; j++) {
4755         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4756         for (l=0; l<sbs; l++){
4757           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4758           rowlen[j*sbs+l] = ncols;
4759           len += ncols;
4760           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4761         }
4762         k++;
4763       }
4764       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4765       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4766     }
4767     /* recvs and sends of i-array are completed */
4768     i = nrecvs;
4769     while (i--) {
4770       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4771     }
4772     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4773 
4774     /* allocate buffers for sending j and a arrays */
4775     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4776     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4777 
4778     /* create i-array of B_oth */
4779     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4780     b_othi[0] = 0;
4781     len = 0; /* total length of j or a array to be received */
4782     k = 0;
4783     for (i=0; i<nrecvs; i++){
4784       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4785       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4786       for (j=0; j<nrows; j++) {
4787         b_othi[k+1] = b_othi[k] + rowlen[j];
4788         len += rowlen[j]; k++;
4789       }
4790       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4791     }
4792 
4793     /* allocate space for j and a arrrays of B_oth */
4794     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4795     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4796 
4797     /* j-array */
4798     /*---------*/
4799     /*  post receives of j-array */
4800     for (i=0; i<nrecvs; i++){
4801       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4802       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4803     }
4804 
4805     /* pack the outgoing message j-array */
4806     k = 0;
4807     for (i=0; i<nsends; i++){
4808       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4809       bufJ = bufj+sstartsj[i];
4810       for (j=0; j<nrows; j++) {
4811         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4812         for (ll=0; ll<sbs; ll++){
4813           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4814           for (l=0; l<ncols; l++){
4815             *bufJ++ = cols[l];
4816           }
4817           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4818         }
4819       }
4820       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4821     }
4822 
4823     /* recvs and sends of j-array are completed */
4824     i = nrecvs;
4825     while (i--) {
4826       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4827     }
4828     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4829   } else if (scall == MAT_REUSE_MATRIX){
4830     sstartsj = *startsj;
4831     rstartsj = sstartsj + nsends +1;
4832     bufa     = *bufa_ptr;
4833     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4834     b_otha   = b_oth->a;
4835   } else {
4836     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4837   }
4838 
4839   /* a-array */
4840   /*---------*/
4841   /*  post receives of a-array */
4842   for (i=0; i<nrecvs; i++){
4843     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4844     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4845   }
4846 
4847   /* pack the outgoing message a-array */
4848   k = 0;
4849   for (i=0; i<nsends; i++){
4850     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4851     bufA = bufa+sstartsj[i];
4852     for (j=0; j<nrows; j++) {
4853       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4854       for (ll=0; ll<sbs; ll++){
4855         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4856         for (l=0; l<ncols; l++){
4857           *bufA++ = vals[l];
4858         }
4859         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4860       }
4861     }
4862     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4863   }
4864   /* recvs and sends of a-array are completed */
4865   i = nrecvs;
4866   while (i--) {
4867     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4868   }
4869   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4870   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4871 
4872   if (scall == MAT_INITIAL_MATRIX){
4873     /* put together the new matrix */
4874     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4875 
4876     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4877     /* Since these are PETSc arrays, change flags to free them as necessary. */
4878     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4879     b_oth->free_a  = PETSC_TRUE;
4880     b_oth->free_ij = PETSC_TRUE;
4881     b_oth->nonew   = 0;
4882 
4883     ierr = PetscFree(bufj);CHKERRQ(ierr);
4884     if (!startsj || !bufa_ptr){
4885       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4886       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4887     } else {
4888       *startsj  = sstartsj;
4889       *bufa_ptr = bufa;
4890     }
4891   }
4892   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4893   PetscFunctionReturn(0);
4894 }
4895 
4896 #undef __FUNCT__
4897 #define __FUNCT__ "MatGetCommunicationStructs"
4898 /*@C
4899   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4900 
4901   Not Collective
4902 
4903   Input Parameters:
4904 . A - The matrix in mpiaij format
4905 
4906   Output Parameter:
4907 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4908 . colmap - A map from global column index to local index into lvec
4909 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4910 
4911   Level: developer
4912 
4913 @*/
4914 #if defined (PETSC_USE_CTABLE)
4915 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4916 #else
4917 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4918 #endif
4919 {
4920   Mat_MPIAIJ *a;
4921 
4922   PetscFunctionBegin;
4923   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4924   PetscValidPointer(lvec, 2)
4925   PetscValidPointer(colmap, 3)
4926   PetscValidPointer(multScatter, 4)
4927   a = (Mat_MPIAIJ *) A->data;
4928   if (lvec) *lvec = a->lvec;
4929   if (colmap) *colmap = a->colmap;
4930   if (multScatter) *multScatter = a->Mvctx;
4931   PetscFunctionReturn(0);
4932 }
4933 
4934 EXTERN_C_BEGIN
4935 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4936 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4937 EXTERN_C_END
4938 
4939 #include "../src/mat/impls/dense/mpi/mpidense.h"
4940 
4941 #undef __FUNCT__
4942 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4943 /*
4944     Computes (B'*A')' since computing B*A directly is untenable
4945 
4946                n                       p                          p
4947         (              )       (              )         (                  )
4948       m (      A       )  *  n (       B      )   =   m (         C        )
4949         (              )       (              )         (                  )
4950 
4951 */
4952 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4953 {
4954   PetscErrorCode     ierr;
4955   Mat                At,Bt,Ct;
4956 
4957   PetscFunctionBegin;
4958   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4959   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4960   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4961   ierr = MatDestroy(At);CHKERRQ(ierr);
4962   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4963   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4964   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4965   PetscFunctionReturn(0);
4966 }
4967 
4968 #undef __FUNCT__
4969 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4970 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4971 {
4972   PetscErrorCode ierr;
4973   PetscInt       m=A->rmap->n,n=B->cmap->n;
4974   Mat            Cmat;
4975 
4976   PetscFunctionBegin;
4977   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4978   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4979   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4980   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4981   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4982   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4983   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4984   *C   = Cmat;
4985   PetscFunctionReturn(0);
4986 }
4987 
4988 /* ----------------------------------------------------------------*/
4989 #undef __FUNCT__
4990 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4991 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4992 {
4993   PetscErrorCode ierr;
4994 
4995   PetscFunctionBegin;
4996   if (scall == MAT_INITIAL_MATRIX){
4997     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4998   }
4999   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5000   PetscFunctionReturn(0);
5001 }
5002 
5003 EXTERN_C_BEGIN
5004 #if defined(PETSC_HAVE_MUMPS)
5005 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
5006 #endif
5007 #if defined(PETSC_HAVE_PASTIX)
5008 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5009 #endif
5010 #if defined(PETSC_HAVE_SUPERLU_DIST)
5011 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5012 #endif
5013 #if defined(PETSC_HAVE_SPOOLES)
5014 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5015 #endif
5016 EXTERN_C_END
5017 
5018 /*MC
5019    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5020 
5021    Options Database Keys:
5022 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5023 
5024   Level: beginner
5025 
5026 .seealso: MatCreateMPIAIJ()
5027 M*/
5028 
5029 EXTERN_C_BEGIN
5030 #undef __FUNCT__
5031 #define __FUNCT__ "MatCreate_MPIAIJ"
5032 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5033 {
5034   Mat_MPIAIJ     *b;
5035   PetscErrorCode ierr;
5036   PetscMPIInt    size;
5037 
5038   PetscFunctionBegin;
5039   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5040 
5041   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5042   B->data         = (void*)b;
5043   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5044   B->rmap->bs      = 1;
5045   B->assembled    = PETSC_FALSE;
5046   B->mapping      = 0;
5047 
5048   B->insertmode      = NOT_SET_VALUES;
5049   b->size            = size;
5050   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5051 
5052   /* build cache for off array entries formed */
5053   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5054   b->donotstash  = PETSC_FALSE;
5055   b->colmap      = 0;
5056   b->garray      = 0;
5057   b->roworiented = PETSC_TRUE;
5058 
5059   /* stuff used for matrix vector multiply */
5060   b->lvec      = PETSC_NULL;
5061   b->Mvctx     = PETSC_NULL;
5062 
5063   /* stuff for MatGetRow() */
5064   b->rowindices   = 0;
5065   b->rowvalues    = 0;
5066   b->getrowactive = PETSC_FALSE;
5067 
5068 #if defined(PETSC_HAVE_SPOOLES)
5069   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5070                                      "MatGetFactor_mpiaij_spooles",
5071                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5072 #endif
5073 #if defined(PETSC_HAVE_MUMPS)
5074   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5075                                      "MatGetFactor_mpiaij_mumps",
5076                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5077 #endif
5078 #if defined(PETSC_HAVE_PASTIX)
5079   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5080 					   "MatGetFactor_mpiaij_pastix",
5081 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5082 #endif
5083 #if defined(PETSC_HAVE_SUPERLU_DIST)
5084   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5085                                      "MatGetFactor_mpiaij_superlu_dist",
5086                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5087 #endif
5088   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5089                                      "MatStoreValues_MPIAIJ",
5090                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5091   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5092                                      "MatRetrieveValues_MPIAIJ",
5093                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5094   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5095 				     "MatGetDiagonalBlock_MPIAIJ",
5096                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5097   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5098 				     "MatIsTranspose_MPIAIJ",
5099 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5100   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5101 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5102 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5103   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5104 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5105 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5106   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5107 				     "MatDiagonalScaleLocal_MPIAIJ",
5108 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5109   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5110                                      "MatConvert_MPIAIJ_MPICSRPERM",
5111                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5112   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5113                                      "MatConvert_MPIAIJ_MPICRL",
5114                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5115   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5116                                      "MatMatMult_MPIDense_MPIAIJ",
5117                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5118   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5119                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5120                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5121   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5122                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5123                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5124   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5125   PetscFunctionReturn(0);
5126 }
5127 EXTERN_C_END
5128 
5129 #undef __FUNCT__
5130 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5131 /*@
5132      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5133          and "off-diagonal" part of the matrix in CSR format.
5134 
5135    Collective on MPI_Comm
5136 
5137    Input Parameters:
5138 +  comm - MPI communicator
5139 .  m - number of local rows (Cannot be PETSC_DECIDE)
5140 .  n - This value should be the same as the local size used in creating the
5141        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5142        calculated if N is given) For square matrices n is almost always m.
5143 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5144 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5145 .   i - row indices for "diagonal" portion of matrix
5146 .   j - column indices
5147 .   a - matrix values
5148 .   oi - row indices for "off-diagonal" portion of matrix
5149 .   oj - column indices
5150 -   oa - matrix values
5151 
5152    Output Parameter:
5153 .   mat - the matrix
5154 
5155    Level: advanced
5156 
5157    Notes:
5158        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5159 
5160        The i and j indices are 0 based
5161 
5162        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5163 
5164        This sets local rows and cannot be used to set off-processor values.
5165 
5166        You cannot later use MatSetValues() to change values in this matrix.
5167 
5168 .keywords: matrix, aij, compressed row, sparse, parallel
5169 
5170 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5171           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5172 @*/
5173 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5174 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5175 {
5176   PetscErrorCode ierr;
5177   Mat_MPIAIJ     *maij;
5178 
5179  PetscFunctionBegin;
5180   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5181   if (i[0]) {
5182     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5183   }
5184   if (oi[0]) {
5185     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5186   }
5187   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5188   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5189   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5190   maij = (Mat_MPIAIJ*) (*mat)->data;
5191   maij->donotstash     = PETSC_TRUE;
5192   (*mat)->preallocated = PETSC_TRUE;
5193 
5194   ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5195   ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5196   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5197   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5198 
5199   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5200   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5201 
5202   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5203   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5204   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5205   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5206 
5207   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5208   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5209   PetscFunctionReturn(0);
5210 }
5211 
5212 /*
5213     Special version for direct calls from Fortran
5214 */
5215 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5216 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5217 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5218 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5219 #endif
5220 
5221 /* Change these macros so can be used in void function */
5222 #undef CHKERRQ
5223 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5224 #undef SETERRQ2
5225 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5226 #undef SETERRQ
5227 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5228 
5229 EXTERN_C_BEGIN
5230 #undef __FUNCT__
5231 #define __FUNCT__ "matsetvaluesmpiaij_"
5232 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5233 {
5234   Mat             mat = *mmat;
5235   PetscInt        m = *mm, n = *mn;
5236   InsertMode      addv = *maddv;
5237   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5238   PetscScalar     value;
5239   PetscErrorCode  ierr;
5240 
5241   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5242   if (mat->insertmode == NOT_SET_VALUES) {
5243     mat->insertmode = addv;
5244   }
5245 #if defined(PETSC_USE_DEBUG)
5246   else if (mat->insertmode != addv) {
5247     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5248   }
5249 #endif
5250   {
5251   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5252   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5253   PetscTruth      roworiented = aij->roworiented;
5254 
5255   /* Some Variables required in the macro */
5256   Mat             A = aij->A;
5257   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5258   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5259   MatScalar       *aa = a->a;
5260   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5261   Mat             B = aij->B;
5262   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5263   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5264   MatScalar       *ba = b->a;
5265 
5266   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5267   PetscInt        nonew = a->nonew;
5268   MatScalar       *ap1,*ap2;
5269 
5270   PetscFunctionBegin;
5271   for (i=0; i<m; i++) {
5272     if (im[i] < 0) continue;
5273 #if defined(PETSC_USE_DEBUG)
5274     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5275 #endif
5276     if (im[i] >= rstart && im[i] < rend) {
5277       row      = im[i] - rstart;
5278       lastcol1 = -1;
5279       rp1      = aj + ai[row];
5280       ap1      = aa + ai[row];
5281       rmax1    = aimax[row];
5282       nrow1    = ailen[row];
5283       low1     = 0;
5284       high1    = nrow1;
5285       lastcol2 = -1;
5286       rp2      = bj + bi[row];
5287       ap2      = ba + bi[row];
5288       rmax2    = bimax[row];
5289       nrow2    = bilen[row];
5290       low2     = 0;
5291       high2    = nrow2;
5292 
5293       for (j=0; j<n; j++) {
5294         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5295         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5296         if (in[j] >= cstart && in[j] < cend){
5297           col = in[j] - cstart;
5298           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5299         } else if (in[j] < 0) continue;
5300 #if defined(PETSC_USE_DEBUG)
5301         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5302 #endif
5303         else {
5304           if (mat->was_assembled) {
5305             if (!aij->colmap) {
5306               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5307             }
5308 #if defined (PETSC_USE_CTABLE)
5309             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5310 	    col--;
5311 #else
5312             col = aij->colmap[in[j]] - 1;
5313 #endif
5314             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5315               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5316               col =  in[j];
5317               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5318               B = aij->B;
5319               b = (Mat_SeqAIJ*)B->data;
5320               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5321               rp2      = bj + bi[row];
5322               ap2      = ba + bi[row];
5323               rmax2    = bimax[row];
5324               nrow2    = bilen[row];
5325               low2     = 0;
5326               high2    = nrow2;
5327               bm       = aij->B->rmap->n;
5328               ba = b->a;
5329             }
5330           } else col = in[j];
5331           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5332         }
5333       }
5334     } else {
5335       if (!aij->donotstash) {
5336         if (roworiented) {
5337           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5338         } else {
5339           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5340         }
5341       }
5342     }
5343   }}
5344   PetscFunctionReturnVoid();
5345 }
5346 EXTERN_C_END
5347 
5348