xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ce0a2cd1da0658c2b28aad1be2e2c8e41567bece)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap.n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap.n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap.n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap.n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
311   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap.n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
415   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap.range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   /* inform other processors of number of messages and max length*/
606   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
607 
608   /* post receives:   */
609   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
610   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
611   for (i=0; i<nrecvs; i++) {
612     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
613   }
614 
615   /* do sends:
616       1) starts[i] gives the starting index in svalues for stuff going to
617          the ith processor
618   */
619   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
620   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
621   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
622   starts[0] = 0;
623   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
624   for (i=0; i<N; i++) {
625     svalues[starts[owner[i]]++] = rows[i];
626   }
627 
628   starts[0] = 0;
629   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   count = 0;
631   for (i=0; i<size; i++) {
632     if (nprocs[2*i+1]) {
633       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
634     }
635   }
636   ierr = PetscFree(starts);CHKERRQ(ierr);
637 
638   base = owners[rank];
639 
640   /*  wait on receives */
641   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
642   source = lens + nrecvs;
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree(lens);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676        Contributed by: Matthew Knepley
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap.n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultAdd_MPIAIJ"
733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
741   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
749 {
750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
751   PetscErrorCode ierr;
752   PetscTruth     merged;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
756   /* do nondiagonal part */
757   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
758   if (!merged) {
759     /* send it on its way */
760     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
761     /* do local part */
762     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
763     /* receive remote parts: note this assumes the values are not actually */
764     /* added in yy until the next line, */
765     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
766   } else {
767     /* do local part */
768     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
769     /* send it on its way */
770     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
771     /* values actually were received in the Begin() but we need to call this nop */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 EXTERN_C_BEGIN
778 #undef __FUNCT__
779 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
781 {
782   MPI_Comm       comm;
783   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
784   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
785   IS             Me,Notme;
786   PetscErrorCode ierr;
787   PetscInt       M,N,first,last,*notme,i;
788   PetscMPIInt    size;
789 
790   PetscFunctionBegin;
791 
792   /* Easy test: symmetric diagonal block */
793   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
794   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
795   if (!*f) PetscFunctionReturn(0);
796   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size == 1) PetscFunctionReturn(0);
799 
800   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
801   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
802   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
803   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
804   for (i=0; i<first; i++) notme[i] = i;
805   for (i=last; i<M; i++) notme[i-last+first] = i;
806   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
807   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
808   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
809   Aoff = Aoffs[0];
810   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
811   Boff = Boffs[0];
812   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
813   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
815   ierr = ISDestroy(Me);CHKERRQ(ierr);
816   ierr = ISDestroy(Notme);CHKERRQ(ierr);
817 
818   PetscFunctionReturn(0);
819 }
820 EXTERN_C_END
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
825 {
826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   /* do nondiagonal part */
831   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
832   /* send it on its way */
833   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
834   /* do local part */
835   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
836   /* receive remote parts */
837   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   This only works correctly for square matrices where the subblock A->A is the
843    diagonal block
844 */
845 #undef __FUNCT__
846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
848 {
849   PetscErrorCode ierr;
850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
851 
852   PetscFunctionBegin;
853   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
854   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
855     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
856   }
857   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatScale_MPIAIJ"
863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
864 {
865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
870   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatDestroy_MPIAIJ"
876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
877 {
878   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882 #if defined(PETSC_USE_LOG)
883   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
884 #endif
885   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
886   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
887   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
888 #if defined (PETSC_USE_CTABLE)
889   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
890 #else
891   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
892 #endif
893   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
894   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
895   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
896   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
897   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
898   ierr = PetscFree(aij);CHKERRQ(ierr);
899 
900   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatView_MPIAIJ_Binary"
913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
914 {
915   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
916   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
917   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
918   PetscErrorCode    ierr;
919   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
920   int               fd;
921   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
922   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
923   PetscScalar       *column_values;
924 
925   PetscFunctionBegin;
926   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
927   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
928   nz   = A->nz + B->nz;
929   if (!rank) {
930     header[0] = MAT_FILE_COOKIE;
931     header[1] = mat->rmap.N;
932     header[2] = mat->cmap.N;
933     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
934     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
936     /* get largest number of rows any processor has */
937     rlen = mat->rmap.n;
938     range = mat->rmap.range;
939     for (i=1; i<size; i++) {
940       rlen = PetscMax(rlen,range[i+1] - range[i]);
941     }
942   } else {
943     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
944     rlen = mat->rmap.n;
945   }
946 
947   /* load up the local row counts */
948   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
949   for (i=0; i<mat->rmap.n; i++) {
950     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
951   }
952 
953   /* store the row lengths to the file */
954   if (!rank) {
955     MPI_Status status;
956     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
957     for (i=1; i<size; i++) {
958       rlen = range[i+1] - range[i];
959       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
960       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
961     }
962   } else {
963     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
964   }
965   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
966 
967   /* load up the local column indices */
968   nzmax = nz; /* )th processor needs space a largest processor needs */
969   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
970   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
971   cnt  = 0;
972   for (i=0; i<mat->rmap.n; i++) {
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ( (col = garray[B->j[j]]) > cstart) break;
975       column_indices[cnt++] = col;
976     }
977     for (k=A->i[i]; k<A->i[i+1]; k++) {
978       column_indices[cnt++] = A->j[k] + cstart;
979     }
980     for (; j<B->i[i+1]; j++) {
981       column_indices[cnt++] = garray[B->j[j]];
982     }
983   }
984   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
985 
986   /* store the column indices to the file */
987   if (!rank) {
988     MPI_Status status;
989     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
990     for (i=1; i<size; i++) {
991       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
992       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
993       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
994       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
995     }
996   } else {
997     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
998     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
999   }
1000   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1001 
1002   /* load up the local column values */
1003   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1004   cnt  = 0;
1005   for (i=0; i<mat->rmap.n; i++) {
1006     for (j=B->i[i]; j<B->i[i+1]; j++) {
1007       if ( garray[B->j[j]] > cstart) break;
1008       column_values[cnt++] = B->a[j];
1009     }
1010     for (k=A->i[i]; k<A->i[i+1]; k++) {
1011       column_values[cnt++] = A->a[k];
1012     }
1013     for (; j<B->i[i+1]; j++) {
1014       column_values[cnt++] = B->a[j];
1015     }
1016   }
1017   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1018 
1019   /* store the column values to the file */
1020   if (!rank) {
1021     MPI_Status status;
1022     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1026       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1028     }
1029   } else {
1030     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1031     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(column_values);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1040 {
1041   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1042   PetscErrorCode    ierr;
1043   PetscMPIInt       rank = aij->rank,size = aij->size;
1044   PetscTruth        isdraw,iascii,isbinary;
1045   PetscViewer       sviewer;
1046   PetscViewerFormat format;
1047 
1048   PetscFunctionBegin;
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1050   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1051   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1052   if (iascii) {
1053     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1054     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1055       MatInfo    info;
1056       PetscTruth inodes;
1057 
1058       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1059       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1060       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1061       if (!inodes) {
1062         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1063 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1064       } else {
1065         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1066 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1067       }
1068       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1070       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1072       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1073       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1074       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1075       PetscFunctionReturn(0);
1076     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1077       PetscInt   inodecount,inodelimit,*inodes;
1078       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1079       if (inodes) {
1080         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1083       }
1084       PetscFunctionReturn(0);
1085     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086       PetscFunctionReturn(0);
1087     }
1088   } else if (isbinary) {
1089     if (size == 1) {
1090       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1091       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1092     } else {
1093       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1094     }
1095     PetscFunctionReturn(0);
1096   } else if (isdraw) {
1097     PetscDraw  draw;
1098     PetscTruth isnull;
1099     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1100     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1101   }
1102 
1103   if (size == 1) {
1104     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106   } else {
1107     /* assemble the entire matrix onto first processor. */
1108     Mat         A;
1109     Mat_SeqAIJ  *Aloc;
1110     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1111     MatScalar   *a;
1112 
1113     if (mat->rmap.N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead");
1114 
1115     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1116     if (!rank) {
1117       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1118     } else {
1119       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1120     }
1121     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1122     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1123     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1124     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1125 
1126     /* copy over the A part */
1127     Aloc = (Mat_SeqAIJ*)aij->A->data;
1128     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1129     row = mat->rmap.rstart;
1130     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
1131     for (i=0; i<m; i++) {
1132       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1133       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1134     }
1135     aj = Aloc->j;
1136     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
1137 
1138     /* copy over the B part */
1139     Aloc = (Mat_SeqAIJ*)aij->B->data;
1140     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1141     row  = mat->rmap.rstart;
1142     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1143     ct   = cols;
1144     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1148     }
1149     ierr = PetscFree(ct);CHKERRQ(ierr);
1150     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1151     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1152     /*
1153        Everyone has to call to draw the matrix since the graphics waits are
1154        synchronized across all processors that share the PetscDraw object
1155     */
1156     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1157     if (!rank) {
1158       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1159       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1160     }
1161     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1162     ierr = MatDestroy(A);CHKERRQ(ierr);
1163   }
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatView_MPIAIJ"
1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1170 {
1171   PetscErrorCode ierr;
1172   PetscTruth     iascii,isdraw,issocket,isbinary;
1173 
1174   PetscFunctionBegin;
1175   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1176   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1177   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1178   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1179   if (iascii || isdraw || isbinary || issocket) {
1180     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1181   } else {
1182     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatRelax_MPIAIJ"
1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1190 {
1191   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1192   PetscErrorCode ierr;
1193   Vec            bb1;
1194 
1195   PetscFunctionBegin;
1196   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1197 
1198   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1199     if (flag & SOR_ZERO_INITIAL_GUESS) {
1200       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1201       its--;
1202     }
1203 
1204     while (its--) {
1205       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1207 
1208       /* update rhs: bb1 = bb - B*x */
1209       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1210       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1211 
1212       /* local sweep */
1213       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1214     }
1215   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1216     if (flag & SOR_ZERO_INITIAL_GUESS) {
1217       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1218       its--;
1219     }
1220     while (its--) {
1221       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1222       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223 
1224       /* update rhs: bb1 = bb - B*x */
1225       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1226       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1227 
1228       /* local sweep */
1229       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1230     }
1231   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1232     if (flag & SOR_ZERO_INITIAL_GUESS) {
1233       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1234       its--;
1235     }
1236     while (its--) {
1237       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239 
1240       /* update rhs: bb1 = bb - B*x */
1241       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1242       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1243 
1244       /* local sweep */
1245       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1246     }
1247   } else {
1248     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1249   }
1250 
1251   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatPermute_MPIAIJ"
1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1258 {
1259   MPI_Comm       comm,pcomm;
1260   PetscInt       first,local_size,nrows,*rows;
1261   int            ntids;
1262   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1267   /* make a collective version of 'rowp' */
1268   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1269   if (pcomm==comm) {
1270     crowp = rowp;
1271   } else {
1272     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1273     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1274     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1275     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1276   }
1277   /* collect the global row permutation and invert it */
1278   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1279   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1280   if (pcomm!=comm) {
1281     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1282   }
1283   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1284   /* get the local target indices */
1285   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1286   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1287   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1288   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1289   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1290   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1291   /* the column permutation is so much easier;
1292      make a local version of 'colp' and invert it */
1293   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1294   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1295   if (ntids==1) {
1296     lcolp = colp;
1297   } else {
1298     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1299     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1300     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1301   }
1302   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1303   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1304   if (ntids>1) {
1305     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1306     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1307   }
1308   /* now we just get the submatrix */
1309   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1310   /* clean up */
1311   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1312   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1318 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1319 {
1320   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1321   Mat            A = mat->A,B = mat->B;
1322   PetscErrorCode ierr;
1323   PetscReal      isend[5],irecv[5];
1324 
1325   PetscFunctionBegin;
1326   info->block_size     = 1.0;
1327   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1328   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1329   isend[3] = info->memory;  isend[4] = info->mallocs;
1330   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1331   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1332   isend[3] += info->memory;  isend[4] += info->mallocs;
1333   if (flag == MAT_LOCAL) {
1334     info->nz_used      = isend[0];
1335     info->nz_allocated = isend[1];
1336     info->nz_unneeded  = isend[2];
1337     info->memory       = isend[3];
1338     info->mallocs      = isend[4];
1339   } else if (flag == MAT_GLOBAL_MAX) {
1340     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1341     info->nz_used      = irecv[0];
1342     info->nz_allocated = irecv[1];
1343     info->nz_unneeded  = irecv[2];
1344     info->memory       = irecv[3];
1345     info->mallocs      = irecv[4];
1346   } else if (flag == MAT_GLOBAL_SUM) {
1347     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1348     info->nz_used      = irecv[0];
1349     info->nz_allocated = irecv[1];
1350     info->nz_unneeded  = irecv[2];
1351     info->memory       = irecv[3];
1352     info->mallocs      = irecv[4];
1353   }
1354   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1355   info->fill_ratio_needed = 0;
1356   info->factor_mallocs    = 0;
1357   info->rows_global       = (double)matin->rmap.N;
1358   info->columns_global    = (double)matin->cmap.N;
1359   info->rows_local        = (double)matin->rmap.n;
1360   info->columns_local     = (double)matin->cmap.N;
1361 
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatSetOption_MPIAIJ"
1367 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1368 {
1369   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   switch (op) {
1374   case MAT_NEW_NONZERO_LOCATIONS:
1375   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1376   case MAT_KEEP_ZEROED_ROWS:
1377   case MAT_NEW_NONZERO_LOCATION_ERR:
1378   case MAT_USE_INODES:
1379   case MAT_IGNORE_ZERO_ENTRIES:
1380     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1381     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1382     break;
1383   case MAT_ROW_ORIENTED:
1384     a->roworiented = flg;
1385     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1386     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1387     break;
1388   case MAT_NEW_DIAGONALS:
1389     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1390     break;
1391   case MAT_IGNORE_OFF_PROC_ENTRIES:
1392     a->donotstash = PETSC_TRUE;
1393     break;
1394   case MAT_SYMMETRIC:
1395     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1396     break;
1397   case MAT_STRUCTURALLY_SYMMETRIC:
1398   case MAT_HERMITIAN:
1399   case MAT_SYMMETRY_ETERNAL:
1400     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1401     break;
1402   default:
1403     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatGetRow_MPIAIJ"
1410 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1411 {
1412   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1413   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1414   PetscErrorCode ierr;
1415   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1416   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1417   PetscInt       *cmap,*idx_p;
1418 
1419   PetscFunctionBegin;
1420   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1421   mat->getrowactive = PETSC_TRUE;
1422 
1423   if (!mat->rowvalues && (idx || v)) {
1424     /*
1425         allocate enough space to hold information from the longest row.
1426     */
1427     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1428     PetscInt     max = 1,tmp;
1429     for (i=0; i<matin->rmap.n; i++) {
1430       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1431       if (max < tmp) { max = tmp; }
1432     }
1433     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1434     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1435   }
1436 
1437   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1438   lrow = row - rstart;
1439 
1440   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1441   if (!v)   {pvA = 0; pvB = 0;}
1442   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1443   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1444   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1445   nztot = nzA + nzB;
1446 
1447   cmap  = mat->garray;
1448   if (v  || idx) {
1449     if (nztot) {
1450       /* Sort by increasing column numbers, assuming A and B already sorted */
1451       PetscInt imark = -1;
1452       if (v) {
1453         *v = v_p = mat->rowvalues;
1454         for (i=0; i<nzB; i++) {
1455           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1456           else break;
1457         }
1458         imark = i;
1459         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1460         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1461       }
1462       if (idx) {
1463         *idx = idx_p = mat->rowindices;
1464         if (imark > -1) {
1465           for (i=0; i<imark; i++) {
1466             idx_p[i] = cmap[cworkB[i]];
1467           }
1468         } else {
1469           for (i=0; i<nzB; i++) {
1470             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1471             else break;
1472           }
1473           imark = i;
1474         }
1475         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1476         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1477       }
1478     } else {
1479       if (idx) *idx = 0;
1480       if (v)   *v   = 0;
1481     }
1482   }
1483   *nz = nztot;
1484   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1485   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 #undef __FUNCT__
1490 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1491 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1492 {
1493   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1494 
1495   PetscFunctionBegin;
1496   if (!aij->getrowactive) {
1497     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1498   }
1499   aij->getrowactive = PETSC_FALSE;
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatNorm_MPIAIJ"
1505 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1506 {
1507   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1508   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1509   PetscErrorCode ierr;
1510   PetscInt       i,j,cstart = mat->cmap.rstart;
1511   PetscReal      sum = 0.0;
1512   MatScalar      *v;
1513 
1514   PetscFunctionBegin;
1515   if (aij->size == 1) {
1516     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1517   } else {
1518     if (type == NORM_FROBENIUS) {
1519       v = amat->a;
1520       for (i=0; i<amat->nz; i++) {
1521 #if defined(PETSC_USE_COMPLEX)
1522         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1523 #else
1524         sum += (*v)*(*v); v++;
1525 #endif
1526       }
1527       v = bmat->a;
1528       for (i=0; i<bmat->nz; i++) {
1529 #if defined(PETSC_USE_COMPLEX)
1530         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1531 #else
1532         sum += (*v)*(*v); v++;
1533 #endif
1534       }
1535       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1536       *norm = sqrt(*norm);
1537     } else if (type == NORM_1) { /* max column norm */
1538       PetscReal *tmp,*tmp2;
1539       PetscInt  *jj,*garray = aij->garray;
1540       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1541       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1542       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1543       *norm = 0.0;
1544       v = amat->a; jj = amat->j;
1545       for (j=0; j<amat->nz; j++) {
1546         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1547       }
1548       v = bmat->a; jj = bmat->j;
1549       for (j=0; j<bmat->nz; j++) {
1550         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1551       }
1552       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1553       for (j=0; j<mat->cmap.N; j++) {
1554         if (tmp2[j] > *norm) *norm = tmp2[j];
1555       }
1556       ierr = PetscFree(tmp);CHKERRQ(ierr);
1557       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1558     } else if (type == NORM_INFINITY) { /* max row norm */
1559       PetscReal ntemp = 0.0;
1560       for (j=0; j<aij->A->rmap.n; j++) {
1561         v = amat->a + amat->i[j];
1562         sum = 0.0;
1563         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1564           sum += PetscAbsScalar(*v); v++;
1565         }
1566         v = bmat->a + bmat->i[j];
1567         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1568           sum += PetscAbsScalar(*v); v++;
1569         }
1570         if (sum > ntemp) ntemp = sum;
1571       }
1572       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1573     } else {
1574       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1575     }
1576   }
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatTranspose_MPIAIJ"
1582 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1583 {
1584   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1585   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1586   PetscErrorCode ierr;
1587   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1588   PetscInt       cstart=A->cmap.rstart,ncol;
1589   Mat            B;
1590   MatScalar      *array;
1591 
1592   PetscFunctionBegin;
1593   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1594 
1595   /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1596   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1597   ai = Aloc->i; aj = Aloc->j;
1598   bi = Bloc->i; bj = Bloc->j;
1599   ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1600   cols = d_nnz + na + 1; /* work space to be used by B part */
1601   ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1602   for (i=0; i<ai[ma]; i++){
1603     d_nnz[aj[i]] ++;
1604     aj[i] += cstart; /* global col index to be used by MatSetValues() */
1605   }
1606 
1607   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1608   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1609   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1610   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1611 
1612   /* copy over the A part */
1613   array = Aloc->a;
1614   row = A->rmap.rstart;
1615   for (i=0; i<ma; i++) {
1616     ncol = ai[i+1]-ai[i];
1617     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1618     row++; array += ncol; aj += ncol;
1619   }
1620   aj = Aloc->j;
1621   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1622 
1623   /* copy over the B part */
1624   array = Bloc->a;
1625   row = A->rmap.rstart;
1626   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1627   for (i=0; i<mb; i++) {
1628     ncol = bi[i+1]-bi[i];
1629     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1630     row++; array += ncol; cols += ncol;
1631   }
1632   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1633   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1634   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1635   if (matout) {
1636     *matout = B;
1637   } else {
1638     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1639   }
1640   PetscFunctionReturn(0);
1641 }
1642 
1643 #undef __FUNCT__
1644 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1645 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1646 {
1647   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1648   Mat            a = aij->A,b = aij->B;
1649   PetscErrorCode ierr;
1650   PetscInt       s1,s2,s3;
1651 
1652   PetscFunctionBegin;
1653   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1654   if (rr) {
1655     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1656     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1657     /* Overlap communication with computation. */
1658     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1659   }
1660   if (ll) {
1661     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1662     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1663     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1664   }
1665   /* scale  the diagonal block */
1666   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1667 
1668   if (rr) {
1669     /* Do a scatter end and then right scale the off-diagonal block */
1670     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1671     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1672   }
1673 
1674   PetscFunctionReturn(0);
1675 }
1676 
1677 #undef __FUNCT__
1678 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1679 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1680 {
1681   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1682   PetscErrorCode ierr;
1683 
1684   PetscFunctionBegin;
1685   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1686   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1687   PetscFunctionReturn(0);
1688 }
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1691 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1692 {
1693   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1694   PetscErrorCode ierr;
1695 
1696   PetscFunctionBegin;
1697   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatEqual_MPIAIJ"
1703 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1704 {
1705   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1706   Mat            a,b,c,d;
1707   PetscTruth     flg;
1708   PetscErrorCode ierr;
1709 
1710   PetscFunctionBegin;
1711   a = matA->A; b = matA->B;
1712   c = matB->A; d = matB->B;
1713 
1714   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1715   if (flg) {
1716     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1717   }
1718   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 #undef __FUNCT__
1723 #define __FUNCT__ "MatCopy_MPIAIJ"
1724 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1725 {
1726   PetscErrorCode ierr;
1727   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1728   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1729 
1730   PetscFunctionBegin;
1731   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1732   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1733     /* because of the column compression in the off-processor part of the matrix a->B,
1734        the number of columns in a->B and b->B may be different, hence we cannot call
1735        the MatCopy() directly on the two parts. If need be, we can provide a more
1736        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1737        then copying the submatrices */
1738     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1739   } else {
1740     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1741     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1742   }
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1748 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1749 {
1750   PetscErrorCode ierr;
1751 
1752   PetscFunctionBegin;
1753   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1754   PetscFunctionReturn(0);
1755 }
1756 
1757 #include "petscblaslapack.h"
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatAXPY_MPIAIJ"
1760 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1761 {
1762   PetscErrorCode ierr;
1763   PetscInt       i;
1764   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1765   PetscBLASInt   bnz,one=1;
1766   Mat_SeqAIJ     *x,*y;
1767 
1768   PetscFunctionBegin;
1769   if (str == SAME_NONZERO_PATTERN) {
1770     PetscScalar alpha = a;
1771     x = (Mat_SeqAIJ *)xx->A->data;
1772     y = (Mat_SeqAIJ *)yy->A->data;
1773     bnz = PetscBLASIntCast(x->nz);
1774     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1775     x = (Mat_SeqAIJ *)xx->B->data;
1776     y = (Mat_SeqAIJ *)yy->B->data;
1777     bnz = PetscBLASIntCast(x->nz);
1778     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1779   } else if (str == SUBSET_NONZERO_PATTERN) {
1780     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1781 
1782     x = (Mat_SeqAIJ *)xx->B->data;
1783     y = (Mat_SeqAIJ *)yy->B->data;
1784     if (y->xtoy && y->XtoY != xx->B) {
1785       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1786       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1787     }
1788     if (!y->xtoy) { /* get xtoy */
1789       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1790       y->XtoY = xx->B;
1791       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1792     }
1793     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1794   } else {
1795     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1796   }
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1801 
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatConjugate_MPIAIJ"
1804 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1805 {
1806 #if defined(PETSC_USE_COMPLEX)
1807   PetscErrorCode ierr;
1808   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1809 
1810   PetscFunctionBegin;
1811   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1812   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1813 #else
1814   PetscFunctionBegin;
1815 #endif
1816   PetscFunctionReturn(0);
1817 }
1818 
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatRealPart_MPIAIJ"
1821 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1822 {
1823   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1824   PetscErrorCode ierr;
1825 
1826   PetscFunctionBegin;
1827   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1828   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 #undef __FUNCT__
1833 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1834 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1835 {
1836   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1837   PetscErrorCode ierr;
1838 
1839   PetscFunctionBegin;
1840   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1841   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 #ifdef PETSC_HAVE_PBGL
1846 
1847 #include <boost/parallel/mpi/bsp_process_group.hpp>
1848 #include <boost/graph/distributed/ilu_default_graph.hpp>
1849 #include <boost/graph/distributed/ilu_0_block.hpp>
1850 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1851 #include <boost/graph/distributed/petsc/interface.hpp>
1852 #include <boost/multi_array.hpp>
1853 #include <boost/parallel/distributed_property_map.hpp>
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1857 /*
1858   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1859 */
1860 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1861 {
1862   namespace petsc = boost::distributed::petsc;
1863 
1864   namespace graph_dist = boost::graph::distributed;
1865   using boost::graph::distributed::ilu_default::process_group_type;
1866   using boost::graph::ilu_permuted;
1867 
1868   PetscTruth      row_identity, col_identity;
1869   PetscContainer  c;
1870   PetscInt        m, n, M, N;
1871   PetscErrorCode  ierr;
1872 
1873   PetscFunctionBegin;
1874   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1875   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1876   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1877   if (!row_identity || !col_identity) {
1878     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1879   }
1880 
1881   process_group_type pg;
1882   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1883   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1884   lgraph_type&   level_graph = *lgraph_p;
1885   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1886 
1887   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1888   ilu_permuted(level_graph);
1889 
1890   /* put together the new matrix */
1891   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1892   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1893   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1894   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1895   ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1896   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1897   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1898   (*fact)->factor = FACTOR_LU;
1899 
1900   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1901   ierr = PetscContainerSetPointer(c, lgraph_p);
1902   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 #undef __FUNCT__
1907 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1908 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1909 {
1910   PetscFunctionBegin;
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatSolve_MPIAIJ"
1916 /*
1917   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1918 */
1919 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1920 {
1921   namespace graph_dist = boost::graph::distributed;
1922 
1923   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1924   lgraph_type*   lgraph_p;
1925   PetscContainer c;
1926   PetscErrorCode ierr;
1927 
1928   PetscFunctionBegin;
1929   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1930   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1931   ierr = VecCopy(b, x); CHKERRQ(ierr);
1932 
1933   PetscScalar* array_x;
1934   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1935   PetscInt sx;
1936   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1937 
1938   PetscScalar* array_b;
1939   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1940   PetscInt sb;
1941   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1942 
1943   lgraph_type&   level_graph = *lgraph_p;
1944   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1945 
1946   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1947   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1948                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1949 
1950   typedef boost::iterator_property_map<array_ref_type::iterator,
1951                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1952   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1953                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1954 
1955   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1956 
1957   PetscFunctionReturn(0);
1958 }
1959 #endif
1960 
1961 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1962   PetscInt       nzlocal,nsends,nrecvs;
1963   PetscMPIInt    *send_rank;
1964   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1965   PetscScalar    *sbuf_a,**rbuf_a;
1966   PetscErrorCode (*MatDestroy)(Mat);
1967 } Mat_Redundant;
1968 
1969 #undef __FUNCT__
1970 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1971 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1972 {
1973   PetscErrorCode       ierr;
1974   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1975   PetscInt             i;
1976 
1977   PetscFunctionBegin;
1978   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1979   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1980   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1981   for (i=0; i<redund->nrecvs; i++){
1982     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1983     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1984   }
1985   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1986   ierr = PetscFree(redund);CHKERRQ(ierr);
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 #undef __FUNCT__
1991 #define __FUNCT__ "MatDestroy_MatRedundant"
1992 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1993 {
1994   PetscErrorCode  ierr;
1995   PetscContainer  container;
1996   Mat_Redundant   *redund=PETSC_NULL;
1997 
1998   PetscFunctionBegin;
1999   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2000   if (container) {
2001     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2002   } else {
2003     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2004   }
2005   A->ops->destroy = redund->MatDestroy;
2006   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2007   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2008   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 #undef __FUNCT__
2013 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2014 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2015 {
2016   PetscMPIInt    rank,size;
2017   MPI_Comm       comm=((PetscObject)mat)->comm;
2018   PetscErrorCode ierr;
2019   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2020   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2021   PetscInt       *rowrange=mat->rmap.range;
2022   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2023   Mat            A=aij->A,B=aij->B,C=*matredundant;
2024   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2025   PetscScalar    *sbuf_a;
2026   PetscInt       nzlocal=a->nz+b->nz;
2027   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2028   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
2029   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2030   MatScalar      *aworkA,*aworkB;
2031   PetscScalar    *vals;
2032   PetscMPIInt    tag1,tag2,tag3,imdex;
2033   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2034                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2035   MPI_Status     recv_status,*send_status;
2036   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2037   PetscInt       **rbuf_j=PETSC_NULL;
2038   PetscScalar    **rbuf_a=PETSC_NULL;
2039   Mat_Redundant  *redund=PETSC_NULL;
2040   PetscContainer container;
2041 
2042   PetscFunctionBegin;
2043   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2044   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2045 
2046   if (reuse == MAT_REUSE_MATRIX) {
2047     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2048     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2049     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2050     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2051     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2052     if (container) {
2053       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2054     } else {
2055       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2056     }
2057     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2058 
2059     nsends    = redund->nsends;
2060     nrecvs    = redund->nrecvs;
2061     send_rank = redund->send_rank; recv_rank = send_rank + size;
2062     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2063     sbuf_j    = redund->sbuf_j;
2064     sbuf_a    = redund->sbuf_a;
2065     rbuf_j    = redund->rbuf_j;
2066     rbuf_a    = redund->rbuf_a;
2067   }
2068 
2069   if (reuse == MAT_INITIAL_MATRIX){
2070     PetscMPIInt  subrank,subsize;
2071     PetscInt     nleftover,np_subcomm;
2072     /* get the destination processors' id send_rank, nsends and nrecvs */
2073     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2074     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2075     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2076     recv_rank = send_rank + size;
2077     np_subcomm = size/nsubcomm;
2078     nleftover  = size - nsubcomm*np_subcomm;
2079     nsends = 0; nrecvs = 0;
2080     for (i=0; i<size; i++){ /* i=rank*/
2081       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2082         send_rank[nsends] = i; nsends++;
2083         recv_rank[nrecvs++] = i;
2084       }
2085     }
2086     if (rank >= size - nleftover){/* this proc is a leftover processor */
2087       i = size-nleftover-1;
2088       j = 0;
2089       while (j < nsubcomm - nleftover){
2090         send_rank[nsends++] = i;
2091         i--; j++;
2092       }
2093     }
2094 
2095     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2096       for (i=0; i<nleftover; i++){
2097         recv_rank[nrecvs++] = size-nleftover+i;
2098       }
2099     }
2100 
2101     /* allocate sbuf_j, sbuf_a */
2102     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2103     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2104     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2105   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2106 
2107   /* copy mat's local entries into the buffers */
2108   if (reuse == MAT_INITIAL_MATRIX){
2109     rownz_max = 0;
2110     rptr = sbuf_j;
2111     cols = sbuf_j + rend-rstart + 1;
2112     vals = sbuf_a;
2113     rptr[0] = 0;
2114     for (i=0; i<rend-rstart; i++){
2115       row = i + rstart;
2116       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2117       ncols  = nzA + nzB;
2118       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2119       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2120       /* load the column indices for this row into cols */
2121       lwrite = 0;
2122       for (l=0; l<nzB; l++) {
2123         if ((ctmp = bmap[cworkB[l]]) < cstart){
2124           vals[lwrite]   = aworkB[l];
2125           cols[lwrite++] = ctmp;
2126         }
2127       }
2128       for (l=0; l<nzA; l++){
2129         vals[lwrite]   = aworkA[l];
2130         cols[lwrite++] = cstart + cworkA[l];
2131       }
2132       for (l=0; l<nzB; l++) {
2133         if ((ctmp = bmap[cworkB[l]]) >= cend){
2134           vals[lwrite]   = aworkB[l];
2135           cols[lwrite++] = ctmp;
2136         }
2137       }
2138       vals += ncols;
2139       cols += ncols;
2140       rptr[i+1] = rptr[i] + ncols;
2141       if (rownz_max < ncols) rownz_max = ncols;
2142     }
2143     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2144   } else { /* only copy matrix values into sbuf_a */
2145     rptr = sbuf_j;
2146     vals = sbuf_a;
2147     rptr[0] = 0;
2148     for (i=0; i<rend-rstart; i++){
2149       row = i + rstart;
2150       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2151       ncols  = nzA + nzB;
2152       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2153       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2154       lwrite = 0;
2155       for (l=0; l<nzB; l++) {
2156         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2157       }
2158       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2159       for (l=0; l<nzB; l++) {
2160         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2161       }
2162       vals += ncols;
2163       rptr[i+1] = rptr[i] + ncols;
2164     }
2165   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2166 
2167   /* send nzlocal to others, and recv other's nzlocal */
2168   /*--------------------------------------------------*/
2169   if (reuse == MAT_INITIAL_MATRIX){
2170     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2171     s_waits2 = s_waits3 + nsends;
2172     s_waits1 = s_waits2 + nsends;
2173     r_waits1 = s_waits1 + nsends;
2174     r_waits2 = r_waits1 + nrecvs;
2175     r_waits3 = r_waits2 + nrecvs;
2176   } else {
2177     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2178     r_waits3 = s_waits3 + nsends;
2179   }
2180 
2181   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2182   if (reuse == MAT_INITIAL_MATRIX){
2183     /* get new tags to keep the communication clean */
2184     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2185     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2186     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2187     rbuf_nz = sbuf_nz + nsends;
2188 
2189     /* post receives of other's nzlocal */
2190     for (i=0; i<nrecvs; i++){
2191       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2192     }
2193     /* send nzlocal to others */
2194     for (i=0; i<nsends; i++){
2195       sbuf_nz[i] = nzlocal;
2196       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2197     }
2198     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2199     count = nrecvs;
2200     while (count) {
2201       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2202       recv_rank[imdex] = recv_status.MPI_SOURCE;
2203       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2204       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2205 
2206       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2207       rbuf_nz[imdex] += i + 2;
2208       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2209       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2210       count--;
2211     }
2212     /* wait on sends of nzlocal */
2213     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2214     /* send mat->i,j to others, and recv from other's */
2215     /*------------------------------------------------*/
2216     for (i=0; i<nsends; i++){
2217       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2218       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2219     }
2220     /* wait on receives of mat->i,j */
2221     /*------------------------------*/
2222     count = nrecvs;
2223     while (count) {
2224       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2225       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2226       count--;
2227     }
2228     /* wait on sends of mat->i,j */
2229     /*---------------------------*/
2230     if (nsends) {
2231       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2232     }
2233   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2234 
2235   /* post receives, send and receive mat->a */
2236   /*----------------------------------------*/
2237   for (imdex=0; imdex<nrecvs; imdex++) {
2238     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2239   }
2240   for (i=0; i<nsends; i++){
2241     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2242   }
2243   count = nrecvs;
2244   while (count) {
2245     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2246     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2247     count--;
2248   }
2249   if (nsends) {
2250     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2251   }
2252 
2253   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2254 
2255   /* create redundant matrix */
2256   /*-------------------------*/
2257   if (reuse == MAT_INITIAL_MATRIX){
2258     /* compute rownz_max for preallocation */
2259     for (imdex=0; imdex<nrecvs; imdex++){
2260       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2261       rptr = rbuf_j[imdex];
2262       for (i=0; i<j; i++){
2263         ncols = rptr[i+1] - rptr[i];
2264         if (rownz_max < ncols) rownz_max = ncols;
2265       }
2266     }
2267 
2268     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2269     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2270     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2271     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2272     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2273   } else {
2274     C = *matredundant;
2275   }
2276 
2277   /* insert local matrix entries */
2278   rptr = sbuf_j;
2279   cols = sbuf_j + rend-rstart + 1;
2280   vals = sbuf_a;
2281   for (i=0; i<rend-rstart; i++){
2282     row   = i + rstart;
2283     ncols = rptr[i+1] - rptr[i];
2284     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2285     vals += ncols;
2286     cols += ncols;
2287   }
2288   /* insert received matrix entries */
2289   for (imdex=0; imdex<nrecvs; imdex++){
2290     rstart = rowrange[recv_rank[imdex]];
2291     rend   = rowrange[recv_rank[imdex]+1];
2292     rptr = rbuf_j[imdex];
2293     cols = rbuf_j[imdex] + rend-rstart + 1;
2294     vals = rbuf_a[imdex];
2295     for (i=0; i<rend-rstart; i++){
2296       row   = i + rstart;
2297       ncols = rptr[i+1] - rptr[i];
2298       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2299       vals += ncols;
2300       cols += ncols;
2301     }
2302   }
2303   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2304   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2305   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2306   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2307   if (reuse == MAT_INITIAL_MATRIX){
2308     PetscContainer container;
2309     *matredundant = C;
2310     /* create a supporting struct and attach it to C for reuse */
2311     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2312     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2313     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2314     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2315     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2316 
2317     redund->nzlocal = nzlocal;
2318     redund->nsends  = nsends;
2319     redund->nrecvs  = nrecvs;
2320     redund->send_rank = send_rank;
2321     redund->sbuf_nz = sbuf_nz;
2322     redund->sbuf_j  = sbuf_j;
2323     redund->sbuf_a  = sbuf_a;
2324     redund->rbuf_j  = rbuf_j;
2325     redund->rbuf_a  = rbuf_a;
2326 
2327     redund->MatDestroy = C->ops->destroy;
2328     C->ops->destroy    = MatDestroy_MatRedundant;
2329   }
2330   PetscFunctionReturn(0);
2331 }
2332 
2333 #undef __FUNCT__
2334 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2335 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2336 {
2337   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2338   PetscInt       n      = A->rmap.n;
2339   PetscInt       cstart = A->cmap.rstart;
2340   PetscInt      *cmap   = mat->garray;
2341   PetscInt      *diagIdx, *offdiagIdx;
2342   Vec            diagV, offdiagV;
2343   PetscScalar   *a, *diagA, *offdiagA;
2344   PetscInt       r;
2345   PetscErrorCode ierr;
2346 
2347   PetscFunctionBegin;
2348   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2349   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2350   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2351   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2352   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2353   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2354   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2355   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2356   for(r = 0; r < n; ++r) {
2357     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2358       a[r]   = diagA[r];
2359       idx[r] = cstart + diagIdx[r];
2360     } else {
2361       a[r]   = offdiagA[r];
2362       idx[r] = cmap[offdiagIdx[r]];
2363     }
2364   }
2365   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2366   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2367   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2368   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2369   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2370   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 #undef __FUNCT__
2375 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2376 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2377 {
2378   PetscErrorCode ierr;
2379 
2380   PetscFunctionBegin;
2381   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 /* -------------------------------------------------------------------*/
2386 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2387        MatGetRow_MPIAIJ,
2388        MatRestoreRow_MPIAIJ,
2389        MatMult_MPIAIJ,
2390 /* 4*/ MatMultAdd_MPIAIJ,
2391        MatMultTranspose_MPIAIJ,
2392        MatMultTransposeAdd_MPIAIJ,
2393 #ifdef PETSC_HAVE_PBGL
2394        MatSolve_MPIAIJ,
2395 #else
2396        0,
2397 #endif
2398        0,
2399        0,
2400 /*10*/ 0,
2401        0,
2402        0,
2403        MatRelax_MPIAIJ,
2404        MatTranspose_MPIAIJ,
2405 /*15*/ MatGetInfo_MPIAIJ,
2406        MatEqual_MPIAIJ,
2407        MatGetDiagonal_MPIAIJ,
2408        MatDiagonalScale_MPIAIJ,
2409        MatNorm_MPIAIJ,
2410 /*20*/ MatAssemblyBegin_MPIAIJ,
2411        MatAssemblyEnd_MPIAIJ,
2412        0,
2413        MatSetOption_MPIAIJ,
2414        MatZeroEntries_MPIAIJ,
2415 /*25*/ MatZeroRows_MPIAIJ,
2416        0,
2417 #ifdef PETSC_HAVE_PBGL
2418        MatLUFactorNumeric_MPIAIJ,
2419 #else
2420        0,
2421 #endif
2422        0,
2423        0,
2424 /*30*/ MatSetUpPreallocation_MPIAIJ,
2425 #ifdef PETSC_HAVE_PBGL
2426        MatILUFactorSymbolic_MPIAIJ,
2427 #else
2428        0,
2429 #endif
2430        0,
2431        0,
2432        0,
2433 /*35*/ MatDuplicate_MPIAIJ,
2434        0,
2435        0,
2436        0,
2437        0,
2438 /*40*/ MatAXPY_MPIAIJ,
2439        MatGetSubMatrices_MPIAIJ,
2440        MatIncreaseOverlap_MPIAIJ,
2441        MatGetValues_MPIAIJ,
2442        MatCopy_MPIAIJ,
2443 /*45*/ 0,
2444        MatScale_MPIAIJ,
2445        0,
2446        0,
2447        0,
2448 /*50*/ MatSetBlockSize_MPIAIJ,
2449        0,
2450        0,
2451        0,
2452        0,
2453 /*55*/ MatFDColoringCreate_MPIAIJ,
2454        0,
2455        MatSetUnfactored_MPIAIJ,
2456        MatPermute_MPIAIJ,
2457        0,
2458 /*60*/ MatGetSubMatrix_MPIAIJ,
2459        MatDestroy_MPIAIJ,
2460        MatView_MPIAIJ,
2461        0,
2462        0,
2463 /*65*/ 0,
2464        0,
2465        0,
2466        0,
2467        0,
2468 /*70*/ 0,
2469        0,
2470        MatSetColoring_MPIAIJ,
2471 #if defined(PETSC_HAVE_ADIC)
2472        MatSetValuesAdic_MPIAIJ,
2473 #else
2474        0,
2475 #endif
2476        MatSetValuesAdifor_MPIAIJ,
2477 /*75*/ 0,
2478        0,
2479        0,
2480        0,
2481        0,
2482 /*80*/ 0,
2483        0,
2484        0,
2485        0,
2486 /*84*/ MatLoad_MPIAIJ,
2487        0,
2488        0,
2489        0,
2490        0,
2491        0,
2492 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2493        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2494        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2495        MatPtAP_Basic,
2496        MatPtAPSymbolic_MPIAIJ,
2497 /*95*/ MatPtAPNumeric_MPIAIJ,
2498        0,
2499        0,
2500        0,
2501        0,
2502 /*100*/0,
2503        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2504        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2505        MatConjugate_MPIAIJ,
2506        0,
2507 /*105*/MatSetValuesRow_MPIAIJ,
2508        MatRealPart_MPIAIJ,
2509        MatImaginaryPart_MPIAIJ,
2510        0,
2511        0,
2512 /*110*/0,
2513        MatGetRedundantMatrix_MPIAIJ,
2514        MatGetRowMin_MPIAIJ,
2515        0,
2516        0,
2517 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2518 
2519 /* ----------------------------------------------------------------------------------------*/
2520 
2521 EXTERN_C_BEGIN
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2524 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2525 {
2526   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2527   PetscErrorCode ierr;
2528 
2529   PetscFunctionBegin;
2530   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2531   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2532   PetscFunctionReturn(0);
2533 }
2534 EXTERN_C_END
2535 
2536 EXTERN_C_BEGIN
2537 #undef __FUNCT__
2538 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2539 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2540 {
2541   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2542   PetscErrorCode ierr;
2543 
2544   PetscFunctionBegin;
2545   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2546   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2547   PetscFunctionReturn(0);
2548 }
2549 EXTERN_C_END
2550 
2551 #include "petscpc.h"
2552 EXTERN_C_BEGIN
2553 #undef __FUNCT__
2554 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2555 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2556 {
2557   Mat_MPIAIJ     *b;
2558   PetscErrorCode ierr;
2559   PetscInt       i;
2560 
2561   PetscFunctionBegin;
2562   B->preallocated = PETSC_TRUE;
2563   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2564   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2565   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2566   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2567 
2568   B->rmap.bs = B->cmap.bs = 1;
2569   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2570   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2571   if (d_nnz) {
2572     for (i=0; i<B->rmap.n; i++) {
2573       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2574     }
2575   }
2576   if (o_nnz) {
2577     for (i=0; i<B->rmap.n; i++) {
2578       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2579     }
2580   }
2581   b = (Mat_MPIAIJ*)B->data;
2582 
2583   /* Explicitly create 2 MATSEQAIJ matrices. */
2584   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2585   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2586   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2587   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2588   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2589   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2590   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2591   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2592 
2593   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2594   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2595 
2596   PetscFunctionReturn(0);
2597 }
2598 EXTERN_C_END
2599 
2600 #undef __FUNCT__
2601 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2602 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2603 {
2604   Mat            mat;
2605   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2606   PetscErrorCode ierr;
2607 
2608   PetscFunctionBegin;
2609   *newmat       = 0;
2610   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2611   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2612   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2613   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2614   a    = (Mat_MPIAIJ*)mat->data;
2615 
2616   mat->factor       = matin->factor;
2617   mat->rmap.bs      = matin->rmap.bs;
2618   mat->assembled    = PETSC_TRUE;
2619   mat->insertmode   = NOT_SET_VALUES;
2620   mat->preallocated = PETSC_TRUE;
2621 
2622   a->size           = oldmat->size;
2623   a->rank           = oldmat->rank;
2624   a->donotstash     = oldmat->donotstash;
2625   a->roworiented    = oldmat->roworiented;
2626   a->rowindices     = 0;
2627   a->rowvalues      = 0;
2628   a->getrowactive   = PETSC_FALSE;
2629 
2630   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2631   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2632 
2633   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2634   if (oldmat->colmap) {
2635 #if defined (PETSC_USE_CTABLE)
2636     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2637 #else
2638     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2639     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2640     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2641 #endif
2642   } else a->colmap = 0;
2643   if (oldmat->garray) {
2644     PetscInt len;
2645     len  = oldmat->B->cmap.n;
2646     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2647     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2648     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2649   } else a->garray = 0;
2650 
2651   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2652   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2653   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2654   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2655   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2656   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2657   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2658   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2659   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2660   *newmat = mat;
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 #include "petscsys.h"
2665 
2666 #undef __FUNCT__
2667 #define __FUNCT__ "MatLoad_MPIAIJ"
2668 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2669 {
2670   Mat            A;
2671   PetscScalar    *vals,*svals;
2672   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2673   MPI_Status     status;
2674   PetscErrorCode ierr;
2675   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2676   PetscInt       i,nz,j,rstart,rend,mmax;
2677   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2678   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2679   PetscInt       cend,cstart,n,*rowners;
2680   int            fd;
2681 
2682   PetscFunctionBegin;
2683   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2684   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2685   if (!rank) {
2686     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2687     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2688     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2689   }
2690 
2691   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2692   M = header[1]; N = header[2];
2693   /* determine ownership of all rows */
2694   m    = M/size + ((M % size) > rank);
2695   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2696   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2697 
2698   /* First process needs enough room for process with most rows */
2699   if (!rank) {
2700     mmax       = rowners[1];
2701     for (i=2; i<size; i++) {
2702       mmax = PetscMax(mmax,rowners[i]);
2703     }
2704   } else mmax = m;
2705 
2706   rowners[0] = 0;
2707   for (i=2; i<=size; i++) {
2708     rowners[i] += rowners[i-1];
2709   }
2710   rstart = rowners[rank];
2711   rend   = rowners[rank+1];
2712 
2713   /* distribute row lengths to all processors */
2714   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2715   if (!rank) {
2716     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2717     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2718     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2719     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2720     for (j=0; j<m; j++) {
2721       procsnz[0] += ourlens[j];
2722     }
2723     for (i=1; i<size; i++) {
2724       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2725       /* calculate the number of nonzeros on each processor */
2726       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2727         procsnz[i] += rowlengths[j];
2728       }
2729       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2730     }
2731     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2732   } else {
2733     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2734   }
2735 
2736   if (!rank) {
2737     /* determine max buffer needed and allocate it */
2738     maxnz = 0;
2739     for (i=0; i<size; i++) {
2740       maxnz = PetscMax(maxnz,procsnz[i]);
2741     }
2742     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2743 
2744     /* read in my part of the matrix column indices  */
2745     nz   = procsnz[0];
2746     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2747     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2748 
2749     /* read in every one elses and ship off */
2750     for (i=1; i<size; i++) {
2751       nz   = procsnz[i];
2752       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2753       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2754     }
2755     ierr = PetscFree(cols);CHKERRQ(ierr);
2756   } else {
2757     /* determine buffer space needed for message */
2758     nz = 0;
2759     for (i=0; i<m; i++) {
2760       nz += ourlens[i];
2761     }
2762     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2763 
2764     /* receive message of column indices*/
2765     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2766     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2767     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2768   }
2769 
2770   /* determine column ownership if matrix is not square */
2771   if (N != M) {
2772     n      = N/size + ((N % size) > rank);
2773     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2774     cstart = cend - n;
2775   } else {
2776     cstart = rstart;
2777     cend   = rend;
2778     n      = cend - cstart;
2779   }
2780 
2781   /* loop over local rows, determining number of off diagonal entries */
2782   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2783   jj = 0;
2784   for (i=0; i<m; i++) {
2785     for (j=0; j<ourlens[i]; j++) {
2786       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2787       jj++;
2788     }
2789   }
2790 
2791   /* create our matrix */
2792   for (i=0; i<m; i++) {
2793     ourlens[i] -= offlens[i];
2794   }
2795   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2796   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2797   ierr = MatSetType(A,type);CHKERRQ(ierr);
2798   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2799 
2800   for (i=0; i<m; i++) {
2801     ourlens[i] += offlens[i];
2802   }
2803 
2804   if (!rank) {
2805     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2806 
2807     /* read in my part of the matrix numerical values  */
2808     nz   = procsnz[0];
2809     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2810 
2811     /* insert into matrix */
2812     jj      = rstart;
2813     smycols = mycols;
2814     svals   = vals;
2815     for (i=0; i<m; i++) {
2816       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2817       smycols += ourlens[i];
2818       svals   += ourlens[i];
2819       jj++;
2820     }
2821 
2822     /* read in other processors and ship out */
2823     for (i=1; i<size; i++) {
2824       nz   = procsnz[i];
2825       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2826       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2827     }
2828     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2829   } else {
2830     /* receive numeric values */
2831     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2832 
2833     /* receive message of values*/
2834     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2835     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2836     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2837 
2838     /* insert into matrix */
2839     jj      = rstart;
2840     smycols = mycols;
2841     svals   = vals;
2842     for (i=0; i<m; i++) {
2843       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2844       smycols += ourlens[i];
2845       svals   += ourlens[i];
2846       jj++;
2847     }
2848   }
2849   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2850   ierr = PetscFree(vals);CHKERRQ(ierr);
2851   ierr = PetscFree(mycols);CHKERRQ(ierr);
2852   ierr = PetscFree(rowners);CHKERRQ(ierr);
2853 
2854   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2855   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2856   *newmat = A;
2857   PetscFunctionReturn(0);
2858 }
2859 
2860 #undef __FUNCT__
2861 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2862 /*
2863     Not great since it makes two copies of the submatrix, first an SeqAIJ
2864   in local and then by concatenating the local matrices the end result.
2865   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2866 */
2867 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2868 {
2869   PetscErrorCode ierr;
2870   PetscMPIInt    rank,size;
2871   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2872   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2873   Mat            *local,M,Mreuse;
2874   MatScalar      *vwork,*aa;
2875   MPI_Comm       comm = ((PetscObject)mat)->comm;
2876   Mat_SeqAIJ     *aij;
2877 
2878 
2879   PetscFunctionBegin;
2880   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2881   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2882 
2883   if (call ==  MAT_REUSE_MATRIX) {
2884     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2885     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2886     local = &Mreuse;
2887     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2888   } else {
2889     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2890     Mreuse = *local;
2891     ierr   = PetscFree(local);CHKERRQ(ierr);
2892   }
2893 
2894   /*
2895       m - number of local rows
2896       n - number of columns (same on all processors)
2897       rstart - first row in new global matrix generated
2898   */
2899   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2900   if (call == MAT_INITIAL_MATRIX) {
2901     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2902     ii  = aij->i;
2903     jj  = aij->j;
2904 
2905     /*
2906         Determine the number of non-zeros in the diagonal and off-diagonal
2907         portions of the matrix in order to do correct preallocation
2908     */
2909 
2910     /* first get start and end of "diagonal" columns */
2911     if (csize == PETSC_DECIDE) {
2912       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2913       if (mglobal == n) { /* square matrix */
2914 	nlocal = m;
2915       } else {
2916         nlocal = n/size + ((n % size) > rank);
2917       }
2918     } else {
2919       nlocal = csize;
2920     }
2921     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2922     rstart = rend - nlocal;
2923     if (rank == size - 1 && rend != n) {
2924       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2925     }
2926 
2927     /* next, compute all the lengths */
2928     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2929     olens = dlens + m;
2930     for (i=0; i<m; i++) {
2931       jend = ii[i+1] - ii[i];
2932       olen = 0;
2933       dlen = 0;
2934       for (j=0; j<jend; j++) {
2935         if (*jj < rstart || *jj >= rend) olen++;
2936         else dlen++;
2937         jj++;
2938       }
2939       olens[i] = olen;
2940       dlens[i] = dlen;
2941     }
2942     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2943     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2944     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2945     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2946     ierr = PetscFree(dlens);CHKERRQ(ierr);
2947   } else {
2948     PetscInt ml,nl;
2949 
2950     M = *newmat;
2951     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2952     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2953     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2954     /*
2955          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2956        rather than the slower MatSetValues().
2957     */
2958     M->was_assembled = PETSC_TRUE;
2959     M->assembled     = PETSC_FALSE;
2960   }
2961   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2962   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2963   ii  = aij->i;
2964   jj  = aij->j;
2965   aa  = aij->a;
2966   for (i=0; i<m; i++) {
2967     row   = rstart + i;
2968     nz    = ii[i+1] - ii[i];
2969     cwork = jj;     jj += nz;
2970     vwork = aa;     aa += nz;
2971     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2972   }
2973 
2974   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2975   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2976   *newmat = M;
2977 
2978   /* save submatrix used in processor for next request */
2979   if (call ==  MAT_INITIAL_MATRIX) {
2980     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2981     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2982   }
2983 
2984   PetscFunctionReturn(0);
2985 }
2986 
2987 EXTERN_C_BEGIN
2988 #undef __FUNCT__
2989 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2990 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2991 {
2992   PetscInt       m,cstart, cend,j,nnz,i,d;
2993   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2994   const PetscInt *JJ;
2995   PetscScalar    *values;
2996   PetscErrorCode ierr;
2997 
2998   PetscFunctionBegin;
2999   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3000 
3001   B->rmap.bs = B->cmap.bs = 1;
3002   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
3003   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
3004   m      = B->rmap.n;
3005   cstart = B->cmap.rstart;
3006   cend   = B->cmap.rend;
3007   rstart = B->rmap.rstart;
3008 
3009   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3010   o_nnz = d_nnz + m;
3011 
3012 #if defined(PETSC_USE_DEBUGGING)
3013   for (i=0; i<m; i++) {
3014     nnz     = Ii[i+1]- Ii[i];
3015     JJ      = J + Ii[i];
3016     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3017     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3018     if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N);
3019     for (j=1; j<nnz; j++) {
3020       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3021     }
3022   }
3023 #endif
3024 
3025   for (i=0; i<m; i++) {
3026     nnz     = Ii[i+1]- Ii[i];
3027     JJ      = J + Ii[i];
3028     nnz_max = PetscMax(nnz_max,nnz);
3029     for (j=0; j<nnz; j++) {
3030       if (*JJ >= cstart) break;
3031       JJ++;
3032     }
3033     d = 0;
3034     for (; j<nnz; j++) {
3035       if (*JJ++ >= cend) break;
3036       d++;
3037     }
3038     d_nnz[i] = d;
3039     o_nnz[i] = nnz - d;
3040   }
3041   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3042   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3043 
3044   if (v) values = (PetscScalar*)v;
3045   else {
3046     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3047     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3048   }
3049 
3050   for (i=0; i<m; i++) {
3051     ii   = i + rstart;
3052     nnz  = Ii[i+1]- Ii[i];
3053     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3054   }
3055   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3056   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3057 
3058   if (!v) {
3059     ierr = PetscFree(values);CHKERRQ(ierr);
3060   }
3061   PetscFunctionReturn(0);
3062 }
3063 EXTERN_C_END
3064 
3065 #undef __FUNCT__
3066 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3067 /*@
3068    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3069    (the default parallel PETSc format).
3070 
3071    Collective on MPI_Comm
3072 
3073    Input Parameters:
3074 +  B - the matrix
3075 .  i - the indices into j for the start of each local row (starts with zero)
3076 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3077 -  v - optional values in the matrix
3078 
3079    Level: developer
3080 
3081    Notes:
3082        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3083      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3084      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3085 
3086        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3087 
3088        The format which is used for the sparse matrix input, is equivalent to a
3089     row-major ordering.. i.e for the following matrix, the input data expected is
3090     as shown:
3091 
3092         1 0 0
3093         2 0 3     P0
3094        -------
3095         4 5 6     P1
3096 
3097      Process0 [P0]: rows_owned=[0,1]
3098         i =  {0,1,3}  [size = nrow+1  = 2+1]
3099         j =  {0,0,2}  [size = nz = 6]
3100         v =  {1,2,3}  [size = nz = 6]
3101 
3102      Process1 [P1]: rows_owned=[2]
3103         i =  {0,3}    [size = nrow+1  = 1+1]
3104         j =  {0,1,2}  [size = nz = 6]
3105         v =  {4,5,6}  [size = nz = 6]
3106 
3107       The column indices for each row MUST be sorted.
3108 
3109 .keywords: matrix, aij, compressed row, sparse, parallel
3110 
3111 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3112           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3113 @*/
3114 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3115 {
3116   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3117 
3118   PetscFunctionBegin;
3119   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3120   if (f) {
3121     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3122   }
3123   PetscFunctionReturn(0);
3124 }
3125 
3126 #undef __FUNCT__
3127 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3128 /*@C
3129    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3130    (the default parallel PETSc format).  For good matrix assembly performance
3131    the user should preallocate the matrix storage by setting the parameters
3132    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3133    performance can be increased by more than a factor of 50.
3134 
3135    Collective on MPI_Comm
3136 
3137    Input Parameters:
3138 +  A - the matrix
3139 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3140            (same value is used for all local rows)
3141 .  d_nnz - array containing the number of nonzeros in the various rows of the
3142            DIAGONAL portion of the local submatrix (possibly different for each row)
3143            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3144            The size of this array is equal to the number of local rows, i.e 'm'.
3145            You must leave room for the diagonal entry even if it is zero.
3146 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3147            submatrix (same value is used for all local rows).
3148 -  o_nnz - array containing the number of nonzeros in the various rows of the
3149            OFF-DIAGONAL portion of the local submatrix (possibly different for
3150            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3151            structure. The size of this array is equal to the number
3152            of local rows, i.e 'm'.
3153 
3154    If the *_nnz parameter is given then the *_nz parameter is ignored
3155 
3156    The AIJ format (also called the Yale sparse matrix format or
3157    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3158    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3159 
3160    The parallel matrix is partitioned such that the first m0 rows belong to
3161    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3162    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3163 
3164    The DIAGONAL portion of the local submatrix of a processor can be defined
3165    as the submatrix which is obtained by extraction the part corresponding
3166    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3167    first row that belongs to the processor, and r2 is the last row belonging
3168    to the this processor. This is a square mxm matrix. The remaining portion
3169    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3170 
3171    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3172 
3173    You can call MatGetInfo() to get information on how effective the preallocation was;
3174    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3175    You can also run with the option -info and look for messages with the string
3176    malloc in them to see if additional memory allocation was needed.
3177 
3178    Example usage:
3179 
3180    Consider the following 8x8 matrix with 34 non-zero values, that is
3181    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3182    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3183    as follows:
3184 
3185 .vb
3186             1  2  0  |  0  3  0  |  0  4
3187     Proc0   0  5  6  |  7  0  0  |  8  0
3188             9  0 10  | 11  0  0  | 12  0
3189     -------------------------------------
3190            13  0 14  | 15 16 17  |  0  0
3191     Proc1   0 18  0  | 19 20 21  |  0  0
3192             0  0  0  | 22 23  0  | 24  0
3193     -------------------------------------
3194     Proc2  25 26 27  |  0  0 28  | 29  0
3195            30  0  0  | 31 32 33  |  0 34
3196 .ve
3197 
3198    This can be represented as a collection of submatrices as:
3199 
3200 .vb
3201       A B C
3202       D E F
3203       G H I
3204 .ve
3205 
3206    Where the submatrices A,B,C are owned by proc0, D,E,F are
3207    owned by proc1, G,H,I are owned by proc2.
3208 
3209    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3210    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3211    The 'M','N' parameters are 8,8, and have the same values on all procs.
3212 
3213    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3214    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3215    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3216    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3217    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3218    matrix, ans [DF] as another SeqAIJ matrix.
3219 
3220    When d_nz, o_nz parameters are specified, d_nz storage elements are
3221    allocated for every row of the local diagonal submatrix, and o_nz
3222    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3223    One way to choose d_nz and o_nz is to use the max nonzerors per local
3224    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3225    In this case, the values of d_nz,o_nz are:
3226 .vb
3227      proc0 : dnz = 2, o_nz = 2
3228      proc1 : dnz = 3, o_nz = 2
3229      proc2 : dnz = 1, o_nz = 4
3230 .ve
3231    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3232    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3233    for proc3. i.e we are using 12+15+10=37 storage locations to store
3234    34 values.
3235 
3236    When d_nnz, o_nnz parameters are specified, the storage is specified
3237    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3238    In the above case the values for d_nnz,o_nnz are:
3239 .vb
3240      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3241      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3242      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3243 .ve
3244    Here the space allocated is sum of all the above values i.e 34, and
3245    hence pre-allocation is perfect.
3246 
3247    Level: intermediate
3248 
3249 .keywords: matrix, aij, compressed row, sparse, parallel
3250 
3251 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3252           MPIAIJ, MatGetInfo()
3253 @*/
3254 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3255 {
3256   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3257 
3258   PetscFunctionBegin;
3259   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3260   if (f) {
3261     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3262   }
3263   PetscFunctionReturn(0);
3264 }
3265 
3266 #undef __FUNCT__
3267 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3268 /*@
3269      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3270          CSR format the local rows.
3271 
3272    Collective on MPI_Comm
3273 
3274    Input Parameters:
3275 +  comm - MPI communicator
3276 .  m - number of local rows (Cannot be PETSC_DECIDE)
3277 .  n - This value should be the same as the local size used in creating the
3278        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3279        calculated if N is given) For square matrices n is almost always m.
3280 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3281 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3282 .   i - row indices
3283 .   j - column indices
3284 -   a - matrix values
3285 
3286    Output Parameter:
3287 .   mat - the matrix
3288 
3289    Level: intermediate
3290 
3291    Notes:
3292        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3293      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3294      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3295 
3296        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3297 
3298        The format which is used for the sparse matrix input, is equivalent to a
3299     row-major ordering.. i.e for the following matrix, the input data expected is
3300     as shown:
3301 
3302         1 0 0
3303         2 0 3     P0
3304        -------
3305         4 5 6     P1
3306 
3307      Process0 [P0]: rows_owned=[0,1]
3308         i =  {0,1,3}  [size = nrow+1  = 2+1]
3309         j =  {0,0,2}  [size = nz = 6]
3310         v =  {1,2,3}  [size = nz = 6]
3311 
3312      Process1 [P1]: rows_owned=[2]
3313         i =  {0,3}    [size = nrow+1  = 1+1]
3314         j =  {0,1,2}  [size = nz = 6]
3315         v =  {4,5,6}  [size = nz = 6]
3316 
3317 .keywords: matrix, aij, compressed row, sparse, parallel
3318 
3319 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3320           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3321 @*/
3322 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3323 {
3324   PetscErrorCode ierr;
3325 
3326  PetscFunctionBegin;
3327   if (i[0]) {
3328     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3329   }
3330   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3331   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3332   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3333   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3334   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3335   PetscFunctionReturn(0);
3336 }
3337 
3338 #undef __FUNCT__
3339 #define __FUNCT__ "MatCreateMPIAIJ"
3340 /*@C
3341    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3342    (the default parallel PETSc format).  For good matrix assembly performance
3343    the user should preallocate the matrix storage by setting the parameters
3344    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3345    performance can be increased by more than a factor of 50.
3346 
3347    Collective on MPI_Comm
3348 
3349    Input Parameters:
3350 +  comm - MPI communicator
3351 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3352            This value should be the same as the local size used in creating the
3353            y vector for the matrix-vector product y = Ax.
3354 .  n - This value should be the same as the local size used in creating the
3355        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3356        calculated if N is given) For square matrices n is almost always m.
3357 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3358 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3359 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3360            (same value is used for all local rows)
3361 .  d_nnz - array containing the number of nonzeros in the various rows of the
3362            DIAGONAL portion of the local submatrix (possibly different for each row)
3363            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3364            The size of this array is equal to the number of local rows, i.e 'm'.
3365            You must leave room for the diagonal entry even if it is zero.
3366 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3367            submatrix (same value is used for all local rows).
3368 -  o_nnz - array containing the number of nonzeros in the various rows of the
3369            OFF-DIAGONAL portion of the local submatrix (possibly different for
3370            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3371            structure. The size of this array is equal to the number
3372            of local rows, i.e 'm'.
3373 
3374    Output Parameter:
3375 .  A - the matrix
3376 
3377    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3378    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3379    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3380    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3381 
3382    Notes:
3383    If the *_nnz parameter is given then the *_nz parameter is ignored
3384 
3385    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3386    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3387    storage requirements for this matrix.
3388 
3389    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3390    processor than it must be used on all processors that share the object for
3391    that argument.
3392 
3393    The user MUST specify either the local or global matrix dimensions
3394    (possibly both).
3395 
3396    The parallel matrix is partitioned across processors such that the
3397    first m0 rows belong to process 0, the next m1 rows belong to
3398    process 1, the next m2 rows belong to process 2 etc.. where
3399    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3400    values corresponding to [m x N] submatrix.
3401 
3402    The columns are logically partitioned with the n0 columns belonging
3403    to 0th partition, the next n1 columns belonging to the next
3404    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3405 
3406    The DIAGONAL portion of the local submatrix on any given processor
3407    is the submatrix corresponding to the rows and columns m,n
3408    corresponding to the given processor. i.e diagonal matrix on
3409    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3410    etc. The remaining portion of the local submatrix [m x (N-n)]
3411    constitute the OFF-DIAGONAL portion. The example below better
3412    illustrates this concept.
3413 
3414    For a square global matrix we define each processor's diagonal portion
3415    to be its local rows and the corresponding columns (a square submatrix);
3416    each processor's off-diagonal portion encompasses the remainder of the
3417    local matrix (a rectangular submatrix).
3418 
3419    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3420 
3421    When calling this routine with a single process communicator, a matrix of
3422    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3423    type of communicator, use the construction mechanism:
3424      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3425 
3426    By default, this format uses inodes (identical nodes) when possible.
3427    We search for consecutive rows with the same nonzero structure, thereby
3428    reusing matrix information to achieve increased efficiency.
3429 
3430    Options Database Keys:
3431 +  -mat_no_inode  - Do not use inodes
3432 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3433 -  -mat_aij_oneindex - Internally use indexing starting at 1
3434         rather than 0.  Note that when calling MatSetValues(),
3435         the user still MUST index entries starting at 0!
3436 
3437 
3438    Example usage:
3439 
3440    Consider the following 8x8 matrix with 34 non-zero values, that is
3441    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3442    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3443    as follows:
3444 
3445 .vb
3446             1  2  0  |  0  3  0  |  0  4
3447     Proc0   0  5  6  |  7  0  0  |  8  0
3448             9  0 10  | 11  0  0  | 12  0
3449     -------------------------------------
3450            13  0 14  | 15 16 17  |  0  0
3451     Proc1   0 18  0  | 19 20 21  |  0  0
3452             0  0  0  | 22 23  0  | 24  0
3453     -------------------------------------
3454     Proc2  25 26 27  |  0  0 28  | 29  0
3455            30  0  0  | 31 32 33  |  0 34
3456 .ve
3457 
3458    This can be represented as a collection of submatrices as:
3459 
3460 .vb
3461       A B C
3462       D E F
3463       G H I
3464 .ve
3465 
3466    Where the submatrices A,B,C are owned by proc0, D,E,F are
3467    owned by proc1, G,H,I are owned by proc2.
3468 
3469    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3470    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3471    The 'M','N' parameters are 8,8, and have the same values on all procs.
3472 
3473    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3474    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3475    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3476    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3477    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3478    matrix, ans [DF] as another SeqAIJ matrix.
3479 
3480    When d_nz, o_nz parameters are specified, d_nz storage elements are
3481    allocated for every row of the local diagonal submatrix, and o_nz
3482    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3483    One way to choose d_nz and o_nz is to use the max nonzerors per local
3484    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3485    In this case, the values of d_nz,o_nz are:
3486 .vb
3487      proc0 : dnz = 2, o_nz = 2
3488      proc1 : dnz = 3, o_nz = 2
3489      proc2 : dnz = 1, o_nz = 4
3490 .ve
3491    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3492    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3493    for proc3. i.e we are using 12+15+10=37 storage locations to store
3494    34 values.
3495 
3496    When d_nnz, o_nnz parameters are specified, the storage is specified
3497    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3498    In the above case the values for d_nnz,o_nnz are:
3499 .vb
3500      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3501      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3502      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3503 .ve
3504    Here the space allocated is sum of all the above values i.e 34, and
3505    hence pre-allocation is perfect.
3506 
3507    Level: intermediate
3508 
3509 .keywords: matrix, aij, compressed row, sparse, parallel
3510 
3511 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3512           MPIAIJ, MatCreateMPIAIJWithArrays()
3513 @*/
3514 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3515 {
3516   PetscErrorCode ierr;
3517   PetscMPIInt    size;
3518 
3519   PetscFunctionBegin;
3520   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3521   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3522   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3523   if (size > 1) {
3524     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3525     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3526   } else {
3527     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3528     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3529   }
3530   PetscFunctionReturn(0);
3531 }
3532 
3533 #undef __FUNCT__
3534 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3535 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3536 {
3537   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3538 
3539   PetscFunctionBegin;
3540   *Ad     = a->A;
3541   *Ao     = a->B;
3542   *colmap = a->garray;
3543   PetscFunctionReturn(0);
3544 }
3545 
3546 #undef __FUNCT__
3547 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3548 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3549 {
3550   PetscErrorCode ierr;
3551   PetscInt       i;
3552   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3553 
3554   PetscFunctionBegin;
3555   if (coloring->ctype == IS_COLORING_GLOBAL) {
3556     ISColoringValue *allcolors,*colors;
3557     ISColoring      ocoloring;
3558 
3559     /* set coloring for diagonal portion */
3560     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3561 
3562     /* set coloring for off-diagonal portion */
3563     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3564     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3565     for (i=0; i<a->B->cmap.n; i++) {
3566       colors[i] = allcolors[a->garray[i]];
3567     }
3568     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3569     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3570     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3571     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3572   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3573     ISColoringValue *colors;
3574     PetscInt        *larray;
3575     ISColoring      ocoloring;
3576 
3577     /* set coloring for diagonal portion */
3578     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3579     for (i=0; i<a->A->cmap.n; i++) {
3580       larray[i] = i + A->cmap.rstart;
3581     }
3582     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3583     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3584     for (i=0; i<a->A->cmap.n; i++) {
3585       colors[i] = coloring->colors[larray[i]];
3586     }
3587     ierr = PetscFree(larray);CHKERRQ(ierr);
3588     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3589     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3590     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3591 
3592     /* set coloring for off-diagonal portion */
3593     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3594     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3595     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3596     for (i=0; i<a->B->cmap.n; i++) {
3597       colors[i] = coloring->colors[larray[i]];
3598     }
3599     ierr = PetscFree(larray);CHKERRQ(ierr);
3600     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3601     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3602     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3603   } else {
3604     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3605   }
3606 
3607   PetscFunctionReturn(0);
3608 }
3609 
3610 #if defined(PETSC_HAVE_ADIC)
3611 #undef __FUNCT__
3612 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3613 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3614 {
3615   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3616   PetscErrorCode ierr;
3617 
3618   PetscFunctionBegin;
3619   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3620   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3621   PetscFunctionReturn(0);
3622 }
3623 #endif
3624 
3625 #undef __FUNCT__
3626 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3627 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3628 {
3629   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3630   PetscErrorCode ierr;
3631 
3632   PetscFunctionBegin;
3633   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3634   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3635   PetscFunctionReturn(0);
3636 }
3637 
3638 #undef __FUNCT__
3639 #define __FUNCT__ "MatMerge"
3640 /*@
3641       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3642                  matrices from each processor
3643 
3644     Collective on MPI_Comm
3645 
3646    Input Parameters:
3647 +    comm - the communicators the parallel matrix will live on
3648 .    inmat - the input sequential matrices
3649 .    n - number of local columns (or PETSC_DECIDE)
3650 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3651 
3652    Output Parameter:
3653 .    outmat - the parallel matrix generated
3654 
3655     Level: advanced
3656 
3657    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3658 
3659 @*/
3660 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3661 {
3662   PetscErrorCode ierr;
3663   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3664   PetscInt       *indx;
3665   PetscScalar    *values;
3666 
3667   PetscFunctionBegin;
3668   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3669   if (scall == MAT_INITIAL_MATRIX){
3670     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3671     if (n == PETSC_DECIDE){
3672       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3673     }
3674     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3675     rstart -= m;
3676 
3677     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3678     for (i=0;i<m;i++) {
3679       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3680       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3681       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3682     }
3683     /* This routine will ONLY return MPIAIJ type matrix */
3684     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3685     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3686     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3687     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3688     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3689 
3690   } else if (scall == MAT_REUSE_MATRIX){
3691     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3692   } else {
3693     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3694   }
3695 
3696   for (i=0;i<m;i++) {
3697     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3698     Ii    = i + rstart;
3699     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3700     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3701   }
3702   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3703   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3704   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3705 
3706   PetscFunctionReturn(0);
3707 }
3708 
3709 #undef __FUNCT__
3710 #define __FUNCT__ "MatFileSplit"
3711 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3712 {
3713   PetscErrorCode    ierr;
3714   PetscMPIInt       rank;
3715   PetscInt          m,N,i,rstart,nnz;
3716   size_t            len;
3717   const PetscInt    *indx;
3718   PetscViewer       out;
3719   char              *name;
3720   Mat               B;
3721   const PetscScalar *values;
3722 
3723   PetscFunctionBegin;
3724   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3725   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3726   /* Should this be the type of the diagonal block of A? */
3727   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3728   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3729   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3730   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3731   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3732   for (i=0;i<m;i++) {
3733     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3734     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3735     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3736   }
3737   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3738   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3739 
3740   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3741   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3742   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3743   sprintf(name,"%s.%d",outfile,rank);
3744   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3745   ierr = PetscFree(name);
3746   ierr = MatView(B,out);CHKERRQ(ierr);
3747   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3748   ierr = MatDestroy(B);CHKERRQ(ierr);
3749   PetscFunctionReturn(0);
3750 }
3751 
3752 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3753 #undef __FUNCT__
3754 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3755 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3756 {
3757   PetscErrorCode       ierr;
3758   Mat_Merge_SeqsToMPI  *merge;
3759   PetscContainer       container;
3760 
3761   PetscFunctionBegin;
3762   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3763   if (container) {
3764     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3765     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3766     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3767     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3768     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3769     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3770     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3771     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3772     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3773     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3774     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3775     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3776 
3777     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3778     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3779   }
3780   ierr = PetscFree(merge);CHKERRQ(ierr);
3781 
3782   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3783   PetscFunctionReturn(0);
3784 }
3785 
3786 #include "src/mat/utils/freespace.h"
3787 #include "petscbt.h"
3788 
3789 #undef __FUNCT__
3790 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3791 /*@C
3792       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3793                  matrices from each processor
3794 
3795     Collective on MPI_Comm
3796 
3797    Input Parameters:
3798 +    comm - the communicators the parallel matrix will live on
3799 .    seqmat - the input sequential matrices
3800 .    m - number of local rows (or PETSC_DECIDE)
3801 .    n - number of local columns (or PETSC_DECIDE)
3802 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3803 
3804    Output Parameter:
3805 .    mpimat - the parallel matrix generated
3806 
3807     Level: advanced
3808 
3809    Notes:
3810      The dimensions of the sequential matrix in each processor MUST be the same.
3811      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3812      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3813 @*/
3814 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3815 {
3816   PetscErrorCode       ierr;
3817   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3818   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3819   PetscMPIInt          size,rank,taga,*len_s;
3820   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3821   PetscInt             proc,m;
3822   PetscInt             **buf_ri,**buf_rj;
3823   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3824   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3825   MPI_Request          *s_waits,*r_waits;
3826   MPI_Status           *status;
3827   MatScalar            *aa=a->a;
3828   MatScalar            **abuf_r,*ba_i;
3829   Mat_Merge_SeqsToMPI  *merge;
3830   PetscContainer       container;
3831 
3832   PetscFunctionBegin;
3833   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3834 
3835   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3836   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3837 
3838   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3839   if (container) {
3840     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3841   }
3842   bi     = merge->bi;
3843   bj     = merge->bj;
3844   buf_ri = merge->buf_ri;
3845   buf_rj = merge->buf_rj;
3846 
3847   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3848   owners = merge->rowmap.range;
3849   len_s  = merge->len_s;
3850 
3851   /* send and recv matrix values */
3852   /*-----------------------------*/
3853   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3854   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3855 
3856   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3857   for (proc=0,k=0; proc<size; proc++){
3858     if (!len_s[proc]) continue;
3859     i = owners[proc];
3860     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3861     k++;
3862   }
3863 
3864   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3865   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3866   ierr = PetscFree(status);CHKERRQ(ierr);
3867 
3868   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3869   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3870 
3871   /* insert mat values of mpimat */
3872   /*----------------------------*/
3873   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
3874   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3875   nextrow = buf_ri_k + merge->nrecv;
3876   nextai  = nextrow + merge->nrecv;
3877 
3878   for (k=0; k<merge->nrecv; k++){
3879     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3880     nrows = *(buf_ri_k[k]);
3881     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3882     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3883   }
3884 
3885   /* set values of ba */
3886   m = merge->rowmap.n;
3887   for (i=0; i<m; i++) {
3888     arow = owners[rank] + i;
3889     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3890     bnzi = bi[i+1] - bi[i];
3891     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3892 
3893     /* add local non-zero vals of this proc's seqmat into ba */
3894     anzi = ai[arow+1] - ai[arow];
3895     aj   = a->j + ai[arow];
3896     aa   = a->a + ai[arow];
3897     nextaj = 0;
3898     for (j=0; nextaj<anzi; j++){
3899       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3900         ba_i[j] += aa[nextaj++];
3901       }
3902     }
3903 
3904     /* add received vals into ba */
3905     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3906       /* i-th row */
3907       if (i == *nextrow[k]) {
3908         anzi = *(nextai[k]+1) - *nextai[k];
3909         aj   = buf_rj[k] + *(nextai[k]);
3910         aa   = abuf_r[k] + *(nextai[k]);
3911         nextaj = 0;
3912         for (j=0; nextaj<anzi; j++){
3913           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3914             ba_i[j] += aa[nextaj++];
3915           }
3916         }
3917         nextrow[k]++; nextai[k]++;
3918       }
3919     }
3920     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3921   }
3922   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3923   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3924 
3925   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3926   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3927   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3928   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3929   PetscFunctionReturn(0);
3930 }
3931 
3932 #undef __FUNCT__
3933 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3934 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3935 {
3936   PetscErrorCode       ierr;
3937   Mat                  B_mpi;
3938   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3939   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3940   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3941   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3942   PetscInt             len,proc,*dnz,*onz;
3943   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3944   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3945   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3946   MPI_Status           *status;
3947   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3948   PetscBT              lnkbt;
3949   Mat_Merge_SeqsToMPI  *merge;
3950   PetscContainer       container;
3951 
3952   PetscFunctionBegin;
3953   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3954 
3955   /* make sure it is a PETSc comm */
3956   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3957   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3958   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3959 
3960   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3961   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3962 
3963   /* determine row ownership */
3964   /*---------------------------------------------------------*/
3965   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3966   merge->rowmap.n = m;
3967   merge->rowmap.N = M;
3968   merge->rowmap.bs = 1;
3969   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3970   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3971   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3972 
3973   m      = merge->rowmap.n;
3974   M      = merge->rowmap.N;
3975   owners = merge->rowmap.range;
3976 
3977   /* determine the number of messages to send, their lengths */
3978   /*---------------------------------------------------------*/
3979   len_s  = merge->len_s;
3980 
3981   len = 0;  /* length of buf_si[] */
3982   merge->nsend = 0;
3983   for (proc=0; proc<size; proc++){
3984     len_si[proc] = 0;
3985     if (proc == rank){
3986       len_s[proc] = 0;
3987     } else {
3988       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3989       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3990     }
3991     if (len_s[proc]) {
3992       merge->nsend++;
3993       nrows = 0;
3994       for (i=owners[proc]; i<owners[proc+1]; i++){
3995         if (ai[i+1] > ai[i]) nrows++;
3996       }
3997       len_si[proc] = 2*(nrows+1);
3998       len += len_si[proc];
3999     }
4000   }
4001 
4002   /* determine the number and length of messages to receive for ij-structure */
4003   /*-------------------------------------------------------------------------*/
4004   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4005   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4006 
4007   /* post the Irecv of j-structure */
4008   /*-------------------------------*/
4009   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4010   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4011 
4012   /* post the Isend of j-structure */
4013   /*--------------------------------*/
4014   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4015   sj_waits = si_waits + merge->nsend;
4016 
4017   for (proc=0, k=0; proc<size; proc++){
4018     if (!len_s[proc]) continue;
4019     i = owners[proc];
4020     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4021     k++;
4022   }
4023 
4024   /* receives and sends of j-structure are complete */
4025   /*------------------------------------------------*/
4026   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4027   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4028 
4029   /* send and recv i-structure */
4030   /*---------------------------*/
4031   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4032   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4033 
4034   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4035   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4036   for (proc=0,k=0; proc<size; proc++){
4037     if (!len_s[proc]) continue;
4038     /* form outgoing message for i-structure:
4039          buf_si[0]:                 nrows to be sent
4040                [1:nrows]:           row index (global)
4041                [nrows+1:2*nrows+1]: i-structure index
4042     */
4043     /*-------------------------------------------*/
4044     nrows = len_si[proc]/2 - 1;
4045     buf_si_i    = buf_si + nrows+1;
4046     buf_si[0]   = nrows;
4047     buf_si_i[0] = 0;
4048     nrows = 0;
4049     for (i=owners[proc]; i<owners[proc+1]; i++){
4050       anzi = ai[i+1] - ai[i];
4051       if (anzi) {
4052         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4053         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4054         nrows++;
4055       }
4056     }
4057     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4058     k++;
4059     buf_si += len_si[proc];
4060   }
4061 
4062   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4063   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4064 
4065   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4066   for (i=0; i<merge->nrecv; i++){
4067     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4068   }
4069 
4070   ierr = PetscFree(len_si);CHKERRQ(ierr);
4071   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4072   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4073   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4074   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4075   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4076   ierr = PetscFree(status);CHKERRQ(ierr);
4077 
4078   /* compute a local seq matrix in each processor */
4079   /*----------------------------------------------*/
4080   /* allocate bi array and free space for accumulating nonzero column info */
4081   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4082   bi[0] = 0;
4083 
4084   /* create and initialize a linked list */
4085   nlnk = N+1;
4086   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4087 
4088   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4089   len = 0;
4090   len  = ai[owners[rank+1]] - ai[owners[rank]];
4091   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4092   current_space = free_space;
4093 
4094   /* determine symbolic info for each local row */
4095   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4096   nextrow = buf_ri_k + merge->nrecv;
4097   nextai  = nextrow + merge->nrecv;
4098   for (k=0; k<merge->nrecv; k++){
4099     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4100     nrows = *buf_ri_k[k];
4101     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4102     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4103   }
4104 
4105   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4106   len = 0;
4107   for (i=0;i<m;i++) {
4108     bnzi   = 0;
4109     /* add local non-zero cols of this proc's seqmat into lnk */
4110     arow   = owners[rank] + i;
4111     anzi   = ai[arow+1] - ai[arow];
4112     aj     = a->j + ai[arow];
4113     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4114     bnzi += nlnk;
4115     /* add received col data into lnk */
4116     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4117       if (i == *nextrow[k]) { /* i-th row */
4118         anzi = *(nextai[k]+1) - *nextai[k];
4119         aj   = buf_rj[k] + *nextai[k];
4120         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4121         bnzi += nlnk;
4122         nextrow[k]++; nextai[k]++;
4123       }
4124     }
4125     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4126 
4127     /* if free space is not available, make more free space */
4128     if (current_space->local_remaining<bnzi) {
4129       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
4130       nspacedouble++;
4131     }
4132     /* copy data into free space, then initialize lnk */
4133     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4134     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4135 
4136     current_space->array           += bnzi;
4137     current_space->local_used      += bnzi;
4138     current_space->local_remaining -= bnzi;
4139 
4140     bi[i+1] = bi[i] + bnzi;
4141   }
4142 
4143   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4144 
4145   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4146   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4147   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4148 
4149   /* create symbolic parallel matrix B_mpi */
4150   /*---------------------------------------*/
4151   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4152   if (n==PETSC_DECIDE) {
4153     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4154   } else {
4155     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4156   }
4157   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4158   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4159   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4160 
4161   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4162   B_mpi->assembled     = PETSC_FALSE;
4163   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4164   merge->bi            = bi;
4165   merge->bj            = bj;
4166   merge->buf_ri        = buf_ri;
4167   merge->buf_rj        = buf_rj;
4168   merge->coi           = PETSC_NULL;
4169   merge->coj           = PETSC_NULL;
4170   merge->owners_co     = PETSC_NULL;
4171 
4172   /* attach the supporting struct to B_mpi for reuse */
4173   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4174   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4175   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4176   *mpimat = B_mpi;
4177 
4178   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4179   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4180   PetscFunctionReturn(0);
4181 }
4182 
4183 #undef __FUNCT__
4184 #define __FUNCT__ "MatMerge_SeqsToMPI"
4185 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4186 {
4187   PetscErrorCode   ierr;
4188 
4189   PetscFunctionBegin;
4190   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4191   if (scall == MAT_INITIAL_MATRIX){
4192     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4193   }
4194   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4195   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4196   PetscFunctionReturn(0);
4197 }
4198 
4199 #undef __FUNCT__
4200 #define __FUNCT__ "MatGetLocalMat"
4201 /*@
4202      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4203 
4204     Not Collective
4205 
4206    Input Parameters:
4207 +    A - the matrix
4208 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4209 
4210    Output Parameter:
4211 .    A_loc - the local sequential matrix generated
4212 
4213     Level: developer
4214 
4215 @*/
4216 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4217 {
4218   PetscErrorCode  ierr;
4219   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4220   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4221   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4222   MatScalar       *aa=a->a,*ba=b->a,*cam;
4223   PetscScalar     *ca;
4224   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
4225   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4226 
4227   PetscFunctionBegin;
4228   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4229   if (scall == MAT_INITIAL_MATRIX){
4230     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4231     ci[0] = 0;
4232     for (i=0; i<am; i++){
4233       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4234     }
4235     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4236     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4237     k = 0;
4238     for (i=0; i<am; i++) {
4239       ncols_o = bi[i+1] - bi[i];
4240       ncols_d = ai[i+1] - ai[i];
4241       /* off-diagonal portion of A */
4242       for (jo=0; jo<ncols_o; jo++) {
4243         col = cmap[*bj];
4244         if (col >= cstart) break;
4245         cj[k]   = col; bj++;
4246         ca[k++] = *ba++;
4247       }
4248       /* diagonal portion of A */
4249       for (j=0; j<ncols_d; j++) {
4250         cj[k]   = cstart + *aj++;
4251         ca[k++] = *aa++;
4252       }
4253       /* off-diagonal portion of A */
4254       for (j=jo; j<ncols_o; j++) {
4255         cj[k]   = cmap[*bj++];
4256         ca[k++] = *ba++;
4257       }
4258     }
4259     /* put together the new matrix */
4260     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4261     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4262     /* Since these are PETSc arrays, change flags to free them as necessary. */
4263     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4264     mat->free_a  = PETSC_TRUE;
4265     mat->free_ij = PETSC_TRUE;
4266     mat->nonew   = 0;
4267   } else if (scall == MAT_REUSE_MATRIX){
4268     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4269     ci = mat->i; cj = mat->j; cam = mat->a;
4270     for (i=0; i<am; i++) {
4271       /* off-diagonal portion of A */
4272       ncols_o = bi[i+1] - bi[i];
4273       for (jo=0; jo<ncols_o; jo++) {
4274         col = cmap[*bj];
4275         if (col >= cstart) break;
4276         *cam++ = *ba++; bj++;
4277       }
4278       /* diagonal portion of A */
4279       ncols_d = ai[i+1] - ai[i];
4280       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4281       /* off-diagonal portion of A */
4282       for (j=jo; j<ncols_o; j++) {
4283         *cam++ = *ba++; bj++;
4284       }
4285     }
4286   } else {
4287     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4288   }
4289 
4290   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4291   PetscFunctionReturn(0);
4292 }
4293 
4294 #undef __FUNCT__
4295 #define __FUNCT__ "MatGetLocalMatCondensed"
4296 /*@C
4297      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4298 
4299     Not Collective
4300 
4301    Input Parameters:
4302 +    A - the matrix
4303 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4304 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4305 
4306    Output Parameter:
4307 .    A_loc - the local sequential matrix generated
4308 
4309     Level: developer
4310 
4311 @*/
4312 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4313 {
4314   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4315   PetscErrorCode    ierr;
4316   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4317   IS                isrowa,iscola;
4318   Mat               *aloc;
4319 
4320   PetscFunctionBegin;
4321   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4322   if (!row){
4323     start = A->rmap.rstart; end = A->rmap.rend;
4324     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4325   } else {
4326     isrowa = *row;
4327   }
4328   if (!col){
4329     start = A->cmap.rstart;
4330     cmap  = a->garray;
4331     nzA   = a->A->cmap.n;
4332     nzB   = a->B->cmap.n;
4333     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4334     ncols = 0;
4335     for (i=0; i<nzB; i++) {
4336       if (cmap[i] < start) idx[ncols++] = cmap[i];
4337       else break;
4338     }
4339     imark = i;
4340     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4341     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4342     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4343     ierr = PetscFree(idx);CHKERRQ(ierr);
4344   } else {
4345     iscola = *col;
4346   }
4347   if (scall != MAT_INITIAL_MATRIX){
4348     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4349     aloc[0] = *A_loc;
4350   }
4351   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4352   *A_loc = aloc[0];
4353   ierr = PetscFree(aloc);CHKERRQ(ierr);
4354   if (!row){
4355     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4356   }
4357   if (!col){
4358     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4359   }
4360   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4361   PetscFunctionReturn(0);
4362 }
4363 
4364 #undef __FUNCT__
4365 #define __FUNCT__ "MatGetBrowsOfAcols"
4366 /*@C
4367     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4368 
4369     Collective on Mat
4370 
4371    Input Parameters:
4372 +    A,B - the matrices in mpiaij format
4373 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4374 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4375 
4376    Output Parameter:
4377 +    rowb, colb - index sets of rows and columns of B to extract
4378 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4379 -    B_seq - the sequential matrix generated
4380 
4381     Level: developer
4382 
4383 @*/
4384 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4385 {
4386   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4387   PetscErrorCode    ierr;
4388   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4389   IS                isrowb,iscolb;
4390   Mat               *bseq;
4391 
4392   PetscFunctionBegin;
4393   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4394     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4395   }
4396   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4397 
4398   if (scall == MAT_INITIAL_MATRIX){
4399     start = A->cmap.rstart;
4400     cmap  = a->garray;
4401     nzA   = a->A->cmap.n;
4402     nzB   = a->B->cmap.n;
4403     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4404     ncols = 0;
4405     for (i=0; i<nzB; i++) {  /* row < local row index */
4406       if (cmap[i] < start) idx[ncols++] = cmap[i];
4407       else break;
4408     }
4409     imark = i;
4410     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4411     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4412     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4413     ierr = PetscFree(idx);CHKERRQ(ierr);
4414     *brstart = imark;
4415     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4416   } else {
4417     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4418     isrowb = *rowb; iscolb = *colb;
4419     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4420     bseq[0] = *B_seq;
4421   }
4422   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4423   *B_seq = bseq[0];
4424   ierr = PetscFree(bseq);CHKERRQ(ierr);
4425   if (!rowb){
4426     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4427   } else {
4428     *rowb = isrowb;
4429   }
4430   if (!colb){
4431     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4432   } else {
4433     *colb = iscolb;
4434   }
4435   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4436   PetscFunctionReturn(0);
4437 }
4438 
4439 #undef __FUNCT__
4440 #define __FUNCT__ "MatGetBrowsOfAoCols"
4441 /*@C
4442     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4443     of the OFF-DIAGONAL portion of local A
4444 
4445     Collective on Mat
4446 
4447    Input Parameters:
4448 +    A,B - the matrices in mpiaij format
4449 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4450 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4451 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4452 
4453    Output Parameter:
4454 +    B_oth - the sequential matrix generated
4455 
4456     Level: developer
4457 
4458 @*/
4459 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4460 {
4461   VecScatter_MPI_General *gen_to,*gen_from;
4462   PetscErrorCode         ierr;
4463   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4464   Mat_SeqAIJ             *b_oth;
4465   VecScatter             ctx=a->Mvctx;
4466   MPI_Comm               comm=((PetscObject)ctx)->comm;
4467   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4468   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4469   PetscScalar            *rvalues,*svalues;
4470   MatScalar              *b_otha,*bufa,*bufA;
4471   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4472   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4473   MPI_Status             *sstatus,rstatus;
4474   PetscMPIInt            jj;
4475   PetscInt               *cols,sbs,rbs;
4476   PetscScalar            *vals;
4477 
4478   PetscFunctionBegin;
4479   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4480     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4481   }
4482   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4483   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4484 
4485   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4486   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4487   rvalues  = gen_from->values; /* holds the length of receiving row */
4488   svalues  = gen_to->values;   /* holds the length of sending row */
4489   nrecvs   = gen_from->n;
4490   nsends   = gen_to->n;
4491 
4492   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4493   srow     = gen_to->indices;   /* local row index to be sent */
4494   sstarts  = gen_to->starts;
4495   sprocs   = gen_to->procs;
4496   sstatus  = gen_to->sstatus;
4497   sbs      = gen_to->bs;
4498   rstarts  = gen_from->starts;
4499   rprocs   = gen_from->procs;
4500   rbs      = gen_from->bs;
4501 
4502   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4503   if (scall == MAT_INITIAL_MATRIX){
4504     /* i-array */
4505     /*---------*/
4506     /*  post receives */
4507     for (i=0; i<nrecvs; i++){
4508       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4509       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4510       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4511     }
4512 
4513     /* pack the outgoing message */
4514     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4515     rstartsj = sstartsj + nsends +1;
4516     sstartsj[0] = 0;  rstartsj[0] = 0;
4517     len = 0; /* total length of j or a array to be sent */
4518     k = 0;
4519     for (i=0; i<nsends; i++){
4520       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4521       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4522       for (j=0; j<nrows; j++) {
4523         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4524         for (l=0; l<sbs; l++){
4525           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4526           rowlen[j*sbs+l] = ncols;
4527           len += ncols;
4528           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4529         }
4530         k++;
4531       }
4532       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4533       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4534     }
4535     /* recvs and sends of i-array are completed */
4536     i = nrecvs;
4537     while (i--) {
4538       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4539     }
4540     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4541 
4542     /* allocate buffers for sending j and a arrays */
4543     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4544     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4545 
4546     /* create i-array of B_oth */
4547     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4548     b_othi[0] = 0;
4549     len = 0; /* total length of j or a array to be received */
4550     k = 0;
4551     for (i=0; i<nrecvs; i++){
4552       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4553       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4554       for (j=0; j<nrows; j++) {
4555         b_othi[k+1] = b_othi[k] + rowlen[j];
4556         len += rowlen[j]; k++;
4557       }
4558       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4559     }
4560 
4561     /* allocate space for j and a arrrays of B_oth */
4562     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4563     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4564 
4565     /* j-array */
4566     /*---------*/
4567     /*  post receives of j-array */
4568     for (i=0; i<nrecvs; i++){
4569       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4570       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4571     }
4572 
4573     /* pack the outgoing message j-array */
4574     k = 0;
4575     for (i=0; i<nsends; i++){
4576       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4577       bufJ = bufj+sstartsj[i];
4578       for (j=0; j<nrows; j++) {
4579         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4580         for (ll=0; ll<sbs; ll++){
4581           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4582           for (l=0; l<ncols; l++){
4583             *bufJ++ = cols[l];
4584           }
4585           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4586         }
4587       }
4588       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4589     }
4590 
4591     /* recvs and sends of j-array are completed */
4592     i = nrecvs;
4593     while (i--) {
4594       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4595     }
4596     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4597   } else if (scall == MAT_REUSE_MATRIX){
4598     sstartsj = *startsj;
4599     rstartsj = sstartsj + nsends +1;
4600     bufa     = *bufa_ptr;
4601     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4602     b_otha   = b_oth->a;
4603   } else {
4604     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4605   }
4606 
4607   /* a-array */
4608   /*---------*/
4609   /*  post receives of a-array */
4610   for (i=0; i<nrecvs; i++){
4611     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4612     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4613   }
4614 
4615   /* pack the outgoing message a-array */
4616   k = 0;
4617   for (i=0; i<nsends; i++){
4618     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4619     bufA = bufa+sstartsj[i];
4620     for (j=0; j<nrows; j++) {
4621       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4622       for (ll=0; ll<sbs; ll++){
4623         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4624         for (l=0; l<ncols; l++){
4625           *bufA++ = vals[l];
4626         }
4627         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4628       }
4629     }
4630     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4631   }
4632   /* recvs and sends of a-array are completed */
4633   i = nrecvs;
4634   while (i--) {
4635     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4636   }
4637   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4638   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4639 
4640   if (scall == MAT_INITIAL_MATRIX){
4641     /* put together the new matrix */
4642     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4643 
4644     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4645     /* Since these are PETSc arrays, change flags to free them as necessary. */
4646     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4647     b_oth->free_a  = PETSC_TRUE;
4648     b_oth->free_ij = PETSC_TRUE;
4649     b_oth->nonew   = 0;
4650 
4651     ierr = PetscFree(bufj);CHKERRQ(ierr);
4652     if (!startsj || !bufa_ptr){
4653       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4654       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4655     } else {
4656       *startsj  = sstartsj;
4657       *bufa_ptr = bufa;
4658     }
4659   }
4660   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4661   PetscFunctionReturn(0);
4662 }
4663 
4664 #undef __FUNCT__
4665 #define __FUNCT__ "MatGetCommunicationStructs"
4666 /*@C
4667   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4668 
4669   Not Collective
4670 
4671   Input Parameters:
4672 . A - The matrix in mpiaij format
4673 
4674   Output Parameter:
4675 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4676 . colmap - A map from global column index to local index into lvec
4677 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4678 
4679   Level: developer
4680 
4681 @*/
4682 #if defined (PETSC_USE_CTABLE)
4683 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4684 #else
4685 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4686 #endif
4687 {
4688   Mat_MPIAIJ *a;
4689 
4690   PetscFunctionBegin;
4691   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4692   PetscValidPointer(lvec, 2)
4693   PetscValidPointer(colmap, 3)
4694   PetscValidPointer(multScatter, 4)
4695   a = (Mat_MPIAIJ *) A->data;
4696   if (lvec) *lvec = a->lvec;
4697   if (colmap) *colmap = a->colmap;
4698   if (multScatter) *multScatter = a->Mvctx;
4699   PetscFunctionReturn(0);
4700 }
4701 
4702 EXTERN_C_BEGIN
4703 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4704 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4705 EXTERN_C_END
4706 
4707 /*MC
4708    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4709 
4710    Options Database Keys:
4711 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4712 
4713   Level: beginner
4714 
4715 .seealso: MatCreateMPIAIJ()
4716 M*/
4717 
4718 EXTERN_C_BEGIN
4719 #undef __FUNCT__
4720 #define __FUNCT__ "MatCreate_MPIAIJ"
4721 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4722 {
4723   Mat_MPIAIJ     *b;
4724   PetscErrorCode ierr;
4725   PetscMPIInt    size;
4726 
4727   PetscFunctionBegin;
4728   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4729 
4730   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4731   B->data         = (void*)b;
4732   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4733   B->factor       = 0;
4734   B->rmap.bs      = 1;
4735   B->assembled    = PETSC_FALSE;
4736   B->mapping      = 0;
4737 
4738   B->insertmode      = NOT_SET_VALUES;
4739   b->size            = size;
4740   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4741 
4742   /* build cache for off array entries formed */
4743   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4744   b->donotstash  = PETSC_FALSE;
4745   b->colmap      = 0;
4746   b->garray      = 0;
4747   b->roworiented = PETSC_TRUE;
4748 
4749   /* stuff used for matrix vector multiply */
4750   b->lvec      = PETSC_NULL;
4751   b->Mvctx     = PETSC_NULL;
4752 
4753   /* stuff for MatGetRow() */
4754   b->rowindices   = 0;
4755   b->rowvalues    = 0;
4756   b->getrowactive = PETSC_FALSE;
4757 
4758 
4759   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4760                                      "MatStoreValues_MPIAIJ",
4761                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4762   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4763                                      "MatRetrieveValues_MPIAIJ",
4764                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4765   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4766 				     "MatGetDiagonalBlock_MPIAIJ",
4767                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4768   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4769 				     "MatIsTranspose_MPIAIJ",
4770 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4771   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4772 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4773 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4774   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4775 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4776 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4777   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4778 				     "MatDiagonalScaleLocal_MPIAIJ",
4779 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4780   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4781                                      "MatConvert_MPIAIJ_MPICSRPERM",
4782                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4783   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4784                                      "MatConvert_MPIAIJ_MPICRL",
4785                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4786   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4787   PetscFunctionReturn(0);
4788 }
4789 EXTERN_C_END
4790 
4791 #undef __FUNCT__
4792 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4793 /*@
4794      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4795          and "off-diagonal" part of the matrix in CSR format.
4796 
4797    Collective on MPI_Comm
4798 
4799    Input Parameters:
4800 +  comm - MPI communicator
4801 .  m - number of local rows (Cannot be PETSC_DECIDE)
4802 .  n - This value should be the same as the local size used in creating the
4803        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4804        calculated if N is given) For square matrices n is almost always m.
4805 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4806 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4807 .   i - row indices for "diagonal" portion of matrix
4808 .   j - column indices
4809 .   a - matrix values
4810 .   oi - row indices for "off-diagonal" portion of matrix
4811 .   oj - column indices
4812 -   oa - matrix values
4813 
4814    Output Parameter:
4815 .   mat - the matrix
4816 
4817    Level: advanced
4818 
4819    Notes:
4820        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4821 
4822        The i and j indices are 0 based
4823 
4824        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4825 
4826 
4827 .keywords: matrix, aij, compressed row, sparse, parallel
4828 
4829 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4830           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4831 @*/
4832 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4833 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4834 {
4835   PetscErrorCode ierr;
4836   Mat_MPIAIJ     *maij;
4837 
4838  PetscFunctionBegin;
4839   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4840   if (i[0]) {
4841     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4842   }
4843   if (oi[0]) {
4844     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4845   }
4846   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4847   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4848   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4849   maij = (Mat_MPIAIJ*) (*mat)->data;
4850   maij->donotstash     = PETSC_TRUE;
4851   (*mat)->preallocated = PETSC_TRUE;
4852 
4853   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4854   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4855   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4856 
4857   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4858   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4859 
4860   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4861   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4862   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4863   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4864 
4865   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4866   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4867   PetscFunctionReturn(0);
4868 }
4869 
4870 /*
4871     Special version for direct calls from Fortran
4872 */
4873 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4874 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4875 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4876 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4877 #endif
4878 
4879 /* Change these macros so can be used in void function */
4880 #undef CHKERRQ
4881 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4882 #undef SETERRQ2
4883 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4884 #undef SETERRQ
4885 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4886 
4887 EXTERN_C_BEGIN
4888 #undef __FUNCT__
4889 #define __FUNCT__ "matsetvaluesmpiaij_"
4890 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4891 {
4892   Mat             mat = *mmat;
4893   PetscInt        m = *mm, n = *mn;
4894   InsertMode      addv = *maddv;
4895   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
4896   PetscScalar     value;
4897   PetscErrorCode  ierr;
4898 
4899   MatPreallocated(mat);
4900   if (mat->insertmode == NOT_SET_VALUES) {
4901     mat->insertmode = addv;
4902   }
4903 #if defined(PETSC_USE_DEBUG)
4904   else if (mat->insertmode != addv) {
4905     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4906   }
4907 #endif
4908   {
4909   PetscInt        i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4910   PetscInt        cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4911   PetscTruth      roworiented = aij->roworiented;
4912 
4913   /* Some Variables required in the macro */
4914   Mat             A = aij->A;
4915   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
4916   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4917   MatScalar       *aa = a->a;
4918   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4919   Mat             B = aij->B;
4920   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
4921   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4922   MatScalar       *ba = b->a;
4923 
4924   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4925   PetscInt        nonew = a->nonew;
4926   MatScalar       *ap1,*ap2;
4927 
4928   PetscFunctionBegin;
4929   for (i=0; i<m; i++) {
4930     if (im[i] < 0) continue;
4931 #if defined(PETSC_USE_DEBUG)
4932     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4933 #endif
4934     if (im[i] >= rstart && im[i] < rend) {
4935       row      = im[i] - rstart;
4936       lastcol1 = -1;
4937       rp1      = aj + ai[row];
4938       ap1      = aa + ai[row];
4939       rmax1    = aimax[row];
4940       nrow1    = ailen[row];
4941       low1     = 0;
4942       high1    = nrow1;
4943       lastcol2 = -1;
4944       rp2      = bj + bi[row];
4945       ap2      = ba + bi[row];
4946       rmax2    = bimax[row];
4947       nrow2    = bilen[row];
4948       low2     = 0;
4949       high2    = nrow2;
4950 
4951       for (j=0; j<n; j++) {
4952         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4953         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4954         if (in[j] >= cstart && in[j] < cend){
4955           col = in[j] - cstart;
4956           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4957         } else if (in[j] < 0) continue;
4958 #if defined(PETSC_USE_DEBUG)
4959         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4960 #endif
4961         else {
4962           if (mat->was_assembled) {
4963             if (!aij->colmap) {
4964               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4965             }
4966 #if defined (PETSC_USE_CTABLE)
4967             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4968 	    col--;
4969 #else
4970             col = aij->colmap[in[j]] - 1;
4971 #endif
4972             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4973               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4974               col =  in[j];
4975               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4976               B = aij->B;
4977               b = (Mat_SeqAIJ*)B->data;
4978               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4979               rp2      = bj + bi[row];
4980               ap2      = ba + bi[row];
4981               rmax2    = bimax[row];
4982               nrow2    = bilen[row];
4983               low2     = 0;
4984               high2    = nrow2;
4985               bm       = aij->B->rmap.n;
4986               ba = b->a;
4987             }
4988           } else col = in[j];
4989           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4990         }
4991       }
4992     } else {
4993       if (!aij->donotstash) {
4994         if (roworiented) {
4995           if (ignorezeroentries && v[i*n] == 0.0) continue;
4996           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4997         } else {
4998           if (ignorezeroentries && v[i] == 0.0) continue;
4999           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5000         }
5001       }
5002     }
5003   }}
5004   PetscFunctionReturnVoid();
5005 }
5006 EXTERN_C_END
5007 
5008