xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0c235cafc85a95aa91c4ceffa8a93606e4f90cfb)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "../src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
415   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap->range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   /* inform other processors of number of messages and max length*/
606   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
607 
608   /* post receives:   */
609   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
610   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
611   for (i=0; i<nrecvs; i++) {
612     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
613   }
614 
615   /* do sends:
616       1) starts[i] gives the starting index in svalues for stuff going to
617          the ith processor
618   */
619   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
620   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
621   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
622   starts[0] = 0;
623   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
624   for (i=0; i<N; i++) {
625     svalues[starts[owner[i]]++] = rows[i];
626   }
627 
628   starts[0] = 0;
629   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   count = 0;
631   for (i=0; i<size; i++) {
632     if (nprocs[2*i+1]) {
633       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
634     }
635   }
636   ierr = PetscFree(starts);CHKERRQ(ierr);
637 
638   base = owners[rank];
639 
640   /*  wait on receives */
641   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
642   source = lens + nrecvs;
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree(lens);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676        Contributed by: Matthew Knepley
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultAdd_MPIAIJ"
733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
741   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
749 {
750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
751   PetscErrorCode ierr;
752   PetscTruth     merged;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
756   /* do nondiagonal part */
757   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
758   if (!merged) {
759     /* send it on its way */
760     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
761     /* do local part */
762     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
763     /* receive remote parts: note this assumes the values are not actually */
764     /* added in yy until the next line, */
765     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
766   } else {
767     /* do local part */
768     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
769     /* send it on its way */
770     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
771     /* values actually were received in the Begin() but we need to call this nop */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 EXTERN_C_BEGIN
778 #undef __FUNCT__
779 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
781 {
782   MPI_Comm       comm;
783   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
784   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
785   IS             Me,Notme;
786   PetscErrorCode ierr;
787   PetscInt       M,N,first,last,*notme,i;
788   PetscMPIInt    size;
789 
790   PetscFunctionBegin;
791 
792   /* Easy test: symmetric diagonal block */
793   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
794   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
795   if (!*f) PetscFunctionReturn(0);
796   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size == 1) PetscFunctionReturn(0);
799 
800   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
801   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
802   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
803   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
804   for (i=0; i<first; i++) notme[i] = i;
805   for (i=last; i<M; i++) notme[i-last+first] = i;
806   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
807   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
808   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
809   Aoff = Aoffs[0];
810   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
811   Boff = Boffs[0];
812   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
813   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
815   ierr = ISDestroy(Me);CHKERRQ(ierr);
816   ierr = ISDestroy(Notme);CHKERRQ(ierr);
817 
818   PetscFunctionReturn(0);
819 }
820 EXTERN_C_END
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
825 {
826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   /* do nondiagonal part */
831   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
832   /* send it on its way */
833   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
834   /* do local part */
835   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
836   /* receive remote parts */
837   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   This only works correctly for square matrices where the subblock A->A is the
843    diagonal block
844 */
845 #undef __FUNCT__
846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
848 {
849   PetscErrorCode ierr;
850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
851 
852   PetscFunctionBegin;
853   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
854   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
855     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
856   }
857   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatScale_MPIAIJ"
863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
864 {
865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
870   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatDestroy_MPIAIJ"
876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
877 {
878   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882 #if defined(PETSC_USE_LOG)
883   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
884 #endif
885   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
886   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
887   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
888 #if defined (PETSC_USE_CTABLE)
889   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
890 #else
891   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
892 #endif
893   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
894   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
895   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
896   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
897   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
898   ierr = PetscFree(aij);CHKERRQ(ierr);
899 
900   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatView_MPIAIJ_Binary"
913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
914 {
915   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
916   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
917   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
918   PetscErrorCode    ierr;
919   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
920   int               fd;
921   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
922   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
923   PetscScalar       *column_values;
924 
925   PetscFunctionBegin;
926   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
927   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
928   nz   = A->nz + B->nz;
929   if (!rank) {
930     header[0] = MAT_FILE_COOKIE;
931     header[1] = mat->rmap->N;
932     header[2] = mat->cmap->N;
933     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
934     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
936     /* get largest number of rows any processor has */
937     rlen = mat->rmap->n;
938     range = mat->rmap->range;
939     for (i=1; i<size; i++) {
940       rlen = PetscMax(rlen,range[i+1] - range[i]);
941     }
942   } else {
943     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
944     rlen = mat->rmap->n;
945   }
946 
947   /* load up the local row counts */
948   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
949   for (i=0; i<mat->rmap->n; i++) {
950     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
951   }
952 
953   /* store the row lengths to the file */
954   if (!rank) {
955     MPI_Status status;
956     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
957     for (i=1; i<size; i++) {
958       rlen = range[i+1] - range[i];
959       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
960       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
961     }
962   } else {
963     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
964   }
965   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
966 
967   /* load up the local column indices */
968   nzmax = nz; /* )th processor needs space a largest processor needs */
969   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
970   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
971   cnt  = 0;
972   for (i=0; i<mat->rmap->n; i++) {
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ( (col = garray[B->j[j]]) > cstart) break;
975       column_indices[cnt++] = col;
976     }
977     for (k=A->i[i]; k<A->i[i+1]; k++) {
978       column_indices[cnt++] = A->j[k] + cstart;
979     }
980     for (; j<B->i[i+1]; j++) {
981       column_indices[cnt++] = garray[B->j[j]];
982     }
983   }
984   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
985 
986   /* store the column indices to the file */
987   if (!rank) {
988     MPI_Status status;
989     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
990     for (i=1; i<size; i++) {
991       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
992       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
993       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
994       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
995     }
996   } else {
997     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
998     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
999   }
1000   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1001 
1002   /* load up the local column values */
1003   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1004   cnt  = 0;
1005   for (i=0; i<mat->rmap->n; i++) {
1006     for (j=B->i[i]; j<B->i[i+1]; j++) {
1007       if ( garray[B->j[j]] > cstart) break;
1008       column_values[cnt++] = B->a[j];
1009     }
1010     for (k=A->i[i]; k<A->i[i+1]; k++) {
1011       column_values[cnt++] = A->a[k];
1012     }
1013     for (; j<B->i[i+1]; j++) {
1014       column_values[cnt++] = B->a[j];
1015     }
1016   }
1017   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1018 
1019   /* store the column values to the file */
1020   if (!rank) {
1021     MPI_Status status;
1022     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1026       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1028     }
1029   } else {
1030     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1031     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(column_values);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1040 {
1041   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1042   PetscErrorCode    ierr;
1043   PetscMPIInt       rank = aij->rank,size = aij->size;
1044   PetscTruth        isdraw,iascii,isbinary;
1045   PetscViewer       sviewer;
1046   PetscViewerFormat format;
1047 
1048   PetscFunctionBegin;
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1050   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1051   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1052   if (iascii) {
1053     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1054     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1055       MatInfo    info;
1056       PetscTruth inodes;
1057 
1058       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1059       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1060       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1061       if (!inodes) {
1062         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1063 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1064       } else {
1065         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1066 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1067       }
1068       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1070       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1072       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1073       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1074       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1075       PetscFunctionReturn(0);
1076     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1077       PetscInt   inodecount,inodelimit,*inodes;
1078       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1079       if (inodes) {
1080         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1083       }
1084       PetscFunctionReturn(0);
1085     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086       PetscFunctionReturn(0);
1087     }
1088   } else if (isbinary) {
1089     if (size == 1) {
1090       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1091       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1092     } else {
1093       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1094     }
1095     PetscFunctionReturn(0);
1096   } else if (isdraw) {
1097     PetscDraw  draw;
1098     PetscTruth isnull;
1099     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1100     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1101   }
1102 
1103   if (size == 1) {
1104     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106   } else {
1107     /* assemble the entire matrix onto first processor. */
1108     Mat         A;
1109     Mat_SeqAIJ  *Aloc;
1110     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1111     MatScalar   *a;
1112 
1113     if (mat->rmap->N > 1024) {
1114       PetscTruth flg = PETSC_FALSE;
1115 
1116       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1117       if (!flg) {
1118         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1119       }
1120     }
1121 
1122     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1123     if (!rank) {
1124       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1125     } else {
1126       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1127     }
1128     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1129     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1130     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1131     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1132 
1133     /* copy over the A part */
1134     Aloc = (Mat_SeqAIJ*)aij->A->data;
1135     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1136     row = mat->rmap->rstart;
1137     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1138     for (i=0; i<m; i++) {
1139       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1140       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1141     }
1142     aj = Aloc->j;
1143     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1144 
1145     /* copy over the B part */
1146     Aloc = (Mat_SeqAIJ*)aij->B->data;
1147     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1148     row  = mat->rmap->rstart;
1149     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1150     ct   = cols;
1151     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1155     }
1156     ierr = PetscFree(ct);CHKERRQ(ierr);
1157     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1158     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1159     /*
1160        Everyone has to call to draw the matrix since the graphics waits are
1161        synchronized across all processors that share the PetscDraw object
1162     */
1163     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1164     if (!rank) {
1165       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1166       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1167     }
1168     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1169     ierr = MatDestroy(A);CHKERRQ(ierr);
1170   }
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatView_MPIAIJ"
1176 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1177 {
1178   PetscErrorCode ierr;
1179   PetscTruth     iascii,isdraw,issocket,isbinary;
1180 
1181   PetscFunctionBegin;
1182   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1183   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1184   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1185   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1186   if (iascii || isdraw || isbinary || issocket) {
1187     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1188   } else {
1189     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1190   }
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatRelax_MPIAIJ"
1196 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1197 {
1198   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1199   PetscErrorCode ierr;
1200   Vec            bb1;
1201 
1202   PetscFunctionBegin;
1203   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1204 
1205   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1206     if (flag & SOR_ZERO_INITIAL_GUESS) {
1207       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1208       its--;
1209     }
1210 
1211     while (its--) {
1212       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1213       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1214 
1215       /* update rhs: bb1 = bb - B*x */
1216       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1217       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1218 
1219       /* local sweep */
1220       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1221     }
1222   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1223     if (flag & SOR_ZERO_INITIAL_GUESS) {
1224       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1225       its--;
1226     }
1227     while (its--) {
1228       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230 
1231       /* update rhs: bb1 = bb - B*x */
1232       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1233       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1234 
1235       /* local sweep */
1236       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1237     }
1238   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1239     if (flag & SOR_ZERO_INITIAL_GUESS) {
1240       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1241       its--;
1242     }
1243     while (its--) {
1244       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246 
1247       /* update rhs: bb1 = bb - B*x */
1248       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1249       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1250 
1251       /* local sweep */
1252       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1253     }
1254   } else {
1255     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1256   }
1257 
1258   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatPermute_MPIAIJ"
1264 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1265 {
1266   MPI_Comm       comm,pcomm;
1267   PetscInt       first,local_size,nrows;
1268   const PetscInt *rows;
1269   PetscMPIInt    size;
1270   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1275   /* make a collective version of 'rowp' */
1276   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1277   if (pcomm==comm) {
1278     crowp = rowp;
1279   } else {
1280     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1281     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1282     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1283     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1284   }
1285   /* collect the global row permutation and invert it */
1286   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1287   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1288   if (pcomm!=comm) {
1289     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1290   }
1291   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1292   /* get the local target indices */
1293   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1294   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1295   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1296   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1297   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1298   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1299   /* the column permutation is so much easier;
1300      make a local version of 'colp' and invert it */
1301   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1302   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1303   if (size==1) {
1304     lcolp = colp;
1305   } else {
1306     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1307     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1308     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1309   }
1310   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1311   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1312   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1313   if (size>1) {
1314     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1315     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1316   }
1317   /* now we just get the submatrix */
1318   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1319   /* clean up */
1320   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1321   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1327 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1328 {
1329   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1330   Mat            A = mat->A,B = mat->B;
1331   PetscErrorCode ierr;
1332   PetscReal      isend[5],irecv[5];
1333 
1334   PetscFunctionBegin;
1335   info->block_size     = 1.0;
1336   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1337   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1338   isend[3] = info->memory;  isend[4] = info->mallocs;
1339   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1340   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1341   isend[3] += info->memory;  isend[4] += info->mallocs;
1342   if (flag == MAT_LOCAL) {
1343     info->nz_used      = isend[0];
1344     info->nz_allocated = isend[1];
1345     info->nz_unneeded  = isend[2];
1346     info->memory       = isend[3];
1347     info->mallocs      = isend[4];
1348   } else if (flag == MAT_GLOBAL_MAX) {
1349     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1350     info->nz_used      = irecv[0];
1351     info->nz_allocated = irecv[1];
1352     info->nz_unneeded  = irecv[2];
1353     info->memory       = irecv[3];
1354     info->mallocs      = irecv[4];
1355   } else if (flag == MAT_GLOBAL_SUM) {
1356     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1357     info->nz_used      = irecv[0];
1358     info->nz_allocated = irecv[1];
1359     info->nz_unneeded  = irecv[2];
1360     info->memory       = irecv[3];
1361     info->mallocs      = irecv[4];
1362   }
1363   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1364   info->fill_ratio_needed = 0;
1365   info->factor_mallocs    = 0;
1366 
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatSetOption_MPIAIJ"
1372 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1373 {
1374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1375   PetscErrorCode ierr;
1376 
1377   PetscFunctionBegin;
1378   switch (op) {
1379   case MAT_NEW_NONZERO_LOCATIONS:
1380   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1381   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1382   case MAT_KEEP_ZEROED_ROWS:
1383   case MAT_NEW_NONZERO_LOCATION_ERR:
1384   case MAT_USE_INODES:
1385   case MAT_IGNORE_ZERO_ENTRIES:
1386     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1387     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1388     break;
1389   case MAT_ROW_ORIENTED:
1390     a->roworiented = flg;
1391     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1392     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1393     break;
1394   case MAT_NEW_DIAGONALS:
1395     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1396     break;
1397   case MAT_IGNORE_OFF_PROC_ENTRIES:
1398     a->donotstash = PETSC_TRUE;
1399     break;
1400   case MAT_SYMMETRIC:
1401     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1402     break;
1403   case MAT_STRUCTURALLY_SYMMETRIC:
1404   case MAT_HERMITIAN:
1405   case MAT_SYMMETRY_ETERNAL:
1406     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1407     break;
1408   default:
1409     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1410   }
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatGetRow_MPIAIJ"
1416 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1417 {
1418   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1419   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1420   PetscErrorCode ierr;
1421   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1422   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1423   PetscInt       *cmap,*idx_p;
1424 
1425   PetscFunctionBegin;
1426   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1427   mat->getrowactive = PETSC_TRUE;
1428 
1429   if (!mat->rowvalues && (idx || v)) {
1430     /*
1431         allocate enough space to hold information from the longest row.
1432     */
1433     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1434     PetscInt     max = 1,tmp;
1435     for (i=0; i<matin->rmap->n; i++) {
1436       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1437       if (max < tmp) { max = tmp; }
1438     }
1439     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1440     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1441   }
1442 
1443   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1444   lrow = row - rstart;
1445 
1446   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1447   if (!v)   {pvA = 0; pvB = 0;}
1448   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1449   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1450   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1451   nztot = nzA + nzB;
1452 
1453   cmap  = mat->garray;
1454   if (v  || idx) {
1455     if (nztot) {
1456       /* Sort by increasing column numbers, assuming A and B already sorted */
1457       PetscInt imark = -1;
1458       if (v) {
1459         *v = v_p = mat->rowvalues;
1460         for (i=0; i<nzB; i++) {
1461           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1462           else break;
1463         }
1464         imark = i;
1465         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1466         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1467       }
1468       if (idx) {
1469         *idx = idx_p = mat->rowindices;
1470         if (imark > -1) {
1471           for (i=0; i<imark; i++) {
1472             idx_p[i] = cmap[cworkB[i]];
1473           }
1474         } else {
1475           for (i=0; i<nzB; i++) {
1476             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1477             else break;
1478           }
1479           imark = i;
1480         }
1481         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1482         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1483       }
1484     } else {
1485       if (idx) *idx = 0;
1486       if (v)   *v   = 0;
1487     }
1488   }
1489   *nz = nztot;
1490   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1491   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1497 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1498 {
1499   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1500 
1501   PetscFunctionBegin;
1502   if (!aij->getrowactive) {
1503     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1504   }
1505   aij->getrowactive = PETSC_FALSE;
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatNorm_MPIAIJ"
1511 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1512 {
1513   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1514   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1515   PetscErrorCode ierr;
1516   PetscInt       i,j,cstart = mat->cmap->rstart;
1517   PetscReal      sum = 0.0;
1518   MatScalar      *v;
1519 
1520   PetscFunctionBegin;
1521   if (aij->size == 1) {
1522     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1523   } else {
1524     if (type == NORM_FROBENIUS) {
1525       v = amat->a;
1526       for (i=0; i<amat->nz; i++) {
1527 #if defined(PETSC_USE_COMPLEX)
1528         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1529 #else
1530         sum += (*v)*(*v); v++;
1531 #endif
1532       }
1533       v = bmat->a;
1534       for (i=0; i<bmat->nz; i++) {
1535 #if defined(PETSC_USE_COMPLEX)
1536         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1537 #else
1538         sum += (*v)*(*v); v++;
1539 #endif
1540       }
1541       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1542       *norm = sqrt(*norm);
1543     } else if (type == NORM_1) { /* max column norm */
1544       PetscReal *tmp,*tmp2;
1545       PetscInt  *jj,*garray = aij->garray;
1546       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1547       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1548       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1549       *norm = 0.0;
1550       v = amat->a; jj = amat->j;
1551       for (j=0; j<amat->nz; j++) {
1552         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1553       }
1554       v = bmat->a; jj = bmat->j;
1555       for (j=0; j<bmat->nz; j++) {
1556         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1557       }
1558       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1559       for (j=0; j<mat->cmap->N; j++) {
1560         if (tmp2[j] > *norm) *norm = tmp2[j];
1561       }
1562       ierr = PetscFree(tmp);CHKERRQ(ierr);
1563       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1564     } else if (type == NORM_INFINITY) { /* max row norm */
1565       PetscReal ntemp = 0.0;
1566       for (j=0; j<aij->A->rmap->n; j++) {
1567         v = amat->a + amat->i[j];
1568         sum = 0.0;
1569         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1570           sum += PetscAbsScalar(*v); v++;
1571         }
1572         v = bmat->a + bmat->i[j];
1573         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1574           sum += PetscAbsScalar(*v); v++;
1575         }
1576         if (sum > ntemp) ntemp = sum;
1577       }
1578       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1579     } else {
1580       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1581     }
1582   }
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatTranspose_MPIAIJ"
1588 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1589 {
1590   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1591   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1592   PetscErrorCode ierr;
1593   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1594   PetscInt       cstart=A->cmap->rstart,ncol;
1595   Mat            B;
1596   MatScalar      *array;
1597 
1598   PetscFunctionBegin;
1599   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1600 
1601   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1602   ai = Aloc->i; aj = Aloc->j;
1603   bi = Bloc->i; bj = Bloc->j;
1604   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1605     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1606     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1607     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1608     for (i=0; i<ai[ma]; i++){
1609       d_nnz[aj[i]] ++;
1610       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1611     }
1612 
1613     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1614     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1615     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1616     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1617     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1618   } else {
1619     B = *matout;
1620   }
1621 
1622   /* copy over the A part */
1623   array = Aloc->a;
1624   row = A->rmap->rstart;
1625   for (i=0; i<ma; i++) {
1626     ncol = ai[i+1]-ai[i];
1627     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1628     row++; array += ncol; aj += ncol;
1629   }
1630   aj = Aloc->j;
1631   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1632 
1633   /* copy over the B part */
1634   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1635   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1636   array = Bloc->a;
1637   row = A->rmap->rstart;
1638   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1639   cols_tmp = cols;
1640   for (i=0; i<mb; i++) {
1641     ncol = bi[i+1]-bi[i];
1642     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1643     row++; array += ncol; cols_tmp += ncol;
1644   }
1645   ierr = PetscFree(cols);CHKERRQ(ierr);
1646 
1647   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1648   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1649   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1650     *matout = B;
1651   } else {
1652     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1653   }
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1659 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1660 {
1661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1662   Mat            a = aij->A,b = aij->B;
1663   PetscErrorCode ierr;
1664   PetscInt       s1,s2,s3;
1665 
1666   PetscFunctionBegin;
1667   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1668   if (rr) {
1669     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1670     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1671     /* Overlap communication with computation. */
1672     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1673   }
1674   if (ll) {
1675     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1676     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1677     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1678   }
1679   /* scale  the diagonal block */
1680   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1681 
1682   if (rr) {
1683     /* Do a scatter end and then right scale the off-diagonal block */
1684     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1685     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1686   }
1687 
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1693 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1694 {
1695   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1696   PetscErrorCode ierr;
1697 
1698   PetscFunctionBegin;
1699   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1700   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1705 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1706 {
1707   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1708   PetscErrorCode ierr;
1709 
1710   PetscFunctionBegin;
1711   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 #undef __FUNCT__
1716 #define __FUNCT__ "MatEqual_MPIAIJ"
1717 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1718 {
1719   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1720   Mat            a,b,c,d;
1721   PetscTruth     flg;
1722   PetscErrorCode ierr;
1723 
1724   PetscFunctionBegin;
1725   a = matA->A; b = matA->B;
1726   c = matB->A; d = matB->B;
1727 
1728   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1729   if (flg) {
1730     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1731   }
1732   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 #undef __FUNCT__
1737 #define __FUNCT__ "MatCopy_MPIAIJ"
1738 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1739 {
1740   PetscErrorCode ierr;
1741   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1742   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1743 
1744   PetscFunctionBegin;
1745   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1746   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1747     /* because of the column compression in the off-processor part of the matrix a->B,
1748        the number of columns in a->B and b->B may be different, hence we cannot call
1749        the MatCopy() directly on the two parts. If need be, we can provide a more
1750        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1751        then copying the submatrices */
1752     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1753   } else {
1754     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1755     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1756   }
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1762 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1763 {
1764   PetscErrorCode ierr;
1765 
1766   PetscFunctionBegin;
1767   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 #include "petscblaslapack.h"
1772 #undef __FUNCT__
1773 #define __FUNCT__ "MatAXPY_MPIAIJ"
1774 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1775 {
1776   PetscErrorCode ierr;
1777   PetscInt       i;
1778   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1779   PetscBLASInt   bnz,one=1;
1780   Mat_SeqAIJ     *x,*y;
1781 
1782   PetscFunctionBegin;
1783   if (str == SAME_NONZERO_PATTERN) {
1784     PetscScalar alpha = a;
1785     x = (Mat_SeqAIJ *)xx->A->data;
1786     y = (Mat_SeqAIJ *)yy->A->data;
1787     bnz = PetscBLASIntCast(x->nz);
1788     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1789     x = (Mat_SeqAIJ *)xx->B->data;
1790     y = (Mat_SeqAIJ *)yy->B->data;
1791     bnz = PetscBLASIntCast(x->nz);
1792     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1793   } else if (str == SUBSET_NONZERO_PATTERN) {
1794     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1795 
1796     x = (Mat_SeqAIJ *)xx->B->data;
1797     y = (Mat_SeqAIJ *)yy->B->data;
1798     if (y->xtoy && y->XtoY != xx->B) {
1799       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1800       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1801     }
1802     if (!y->xtoy) { /* get xtoy */
1803       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1804       y->XtoY = xx->B;
1805       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1806     }
1807     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1808   } else {
1809     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1810   }
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1815 
1816 #undef __FUNCT__
1817 #define __FUNCT__ "MatConjugate_MPIAIJ"
1818 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1819 {
1820 #if defined(PETSC_USE_COMPLEX)
1821   PetscErrorCode ierr;
1822   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1823 
1824   PetscFunctionBegin;
1825   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1826   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1827 #else
1828   PetscFunctionBegin;
1829 #endif
1830   PetscFunctionReturn(0);
1831 }
1832 
1833 #undef __FUNCT__
1834 #define __FUNCT__ "MatRealPart_MPIAIJ"
1835 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1836 {
1837   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1838   PetscErrorCode ierr;
1839 
1840   PetscFunctionBegin;
1841   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1842   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 #undef __FUNCT__
1847 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1848 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1849 {
1850   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1851   PetscErrorCode ierr;
1852 
1853   PetscFunctionBegin;
1854   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1855   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 #ifdef PETSC_HAVE_PBGL
1860 
1861 #include <boost/parallel/mpi/bsp_process_group.hpp>
1862 #include <boost/graph/distributed/ilu_default_graph.hpp>
1863 #include <boost/graph/distributed/ilu_0_block.hpp>
1864 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1865 #include <boost/graph/distributed/petsc/interface.hpp>
1866 #include <boost/multi_array.hpp>
1867 #include <boost/parallel/distributed_property_map->hpp>
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1871 /*
1872   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1873 */
1874 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1875 {
1876   namespace petsc = boost::distributed::petsc;
1877 
1878   namespace graph_dist = boost::graph::distributed;
1879   using boost::graph::distributed::ilu_default::process_group_type;
1880   using boost::graph::ilu_permuted;
1881 
1882   PetscTruth      row_identity, col_identity;
1883   PetscContainer  c;
1884   PetscInt        m, n, M, N;
1885   PetscErrorCode  ierr;
1886 
1887   PetscFunctionBegin;
1888   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1889   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1890   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1891   if (!row_identity || !col_identity) {
1892     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1893   }
1894 
1895   process_group_type pg;
1896   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1897   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1898   lgraph_type&   level_graph = *lgraph_p;
1899   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1900 
1901   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1902   ilu_permuted(level_graph);
1903 
1904   /* put together the new matrix */
1905   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1906   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1907   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1908   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1909   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1910   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1911   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1912 
1913   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1914   ierr = PetscContainerSetPointer(c, lgraph_p);
1915   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1921 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1922 {
1923   PetscFunctionBegin;
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 #undef __FUNCT__
1928 #define __FUNCT__ "MatSolve_MPIAIJ"
1929 /*
1930   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1931 */
1932 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1933 {
1934   namespace graph_dist = boost::graph::distributed;
1935 
1936   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1937   lgraph_type*   lgraph_p;
1938   PetscContainer c;
1939   PetscErrorCode ierr;
1940 
1941   PetscFunctionBegin;
1942   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1943   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1944   ierr = VecCopy(b, x);CHKERRQ(ierr);
1945 
1946   PetscScalar* array_x;
1947   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1948   PetscInt sx;
1949   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1950 
1951   PetscScalar* array_b;
1952   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1953   PetscInt sb;
1954   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1955 
1956   lgraph_type&   level_graph = *lgraph_p;
1957   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1958 
1959   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1960   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1961                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1962 
1963   typedef boost::iterator_property_map<array_ref_type::iterator,
1964                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1965   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1966                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1967 
1968   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1969 
1970   PetscFunctionReturn(0);
1971 }
1972 #endif
1973 
1974 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1975   PetscInt       nzlocal,nsends,nrecvs;
1976   PetscMPIInt    *send_rank;
1977   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1978   PetscScalar    *sbuf_a,**rbuf_a;
1979   PetscErrorCode (*MatDestroy)(Mat);
1980 } Mat_Redundant;
1981 
1982 #undef __FUNCT__
1983 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1984 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1985 {
1986   PetscErrorCode       ierr;
1987   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1988   PetscInt             i;
1989 
1990   PetscFunctionBegin;
1991   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1992   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1993   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1994   for (i=0; i<redund->nrecvs; i++){
1995     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1996     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1997   }
1998   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1999   ierr = PetscFree(redund);CHKERRQ(ierr);
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatDestroy_MatRedundant"
2005 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2006 {
2007   PetscErrorCode  ierr;
2008   PetscContainer  container;
2009   Mat_Redundant   *redund=PETSC_NULL;
2010 
2011   PetscFunctionBegin;
2012   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2013   if (container) {
2014     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2015   } else {
2016     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2017   }
2018   A->ops->destroy = redund->MatDestroy;
2019   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2020   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2021   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 #undef __FUNCT__
2026 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2027 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2028 {
2029   PetscMPIInt    rank,size;
2030   MPI_Comm       comm=((PetscObject)mat)->comm;
2031   PetscErrorCode ierr;
2032   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2033   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2034   PetscInt       *rowrange=mat->rmap->range;
2035   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2036   Mat            A=aij->A,B=aij->B,C=*matredundant;
2037   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2038   PetscScalar    *sbuf_a;
2039   PetscInt       nzlocal=a->nz+b->nz;
2040   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2041   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2042   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2043   MatScalar      *aworkA,*aworkB;
2044   PetscScalar    *vals;
2045   PetscMPIInt    tag1,tag2,tag3,imdex;
2046   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2047                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2048   MPI_Status     recv_status,*send_status;
2049   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2050   PetscInt       **rbuf_j=PETSC_NULL;
2051   PetscScalar    **rbuf_a=PETSC_NULL;
2052   Mat_Redundant  *redund=PETSC_NULL;
2053   PetscContainer container;
2054 
2055   PetscFunctionBegin;
2056   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2057   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2058 
2059   if (reuse == MAT_REUSE_MATRIX) {
2060     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2061     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2062     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2063     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2064     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2065     if (container) {
2066       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2067     } else {
2068       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2069     }
2070     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2071 
2072     nsends    = redund->nsends;
2073     nrecvs    = redund->nrecvs;
2074     send_rank = redund->send_rank; recv_rank = send_rank + size;
2075     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2076     sbuf_j    = redund->sbuf_j;
2077     sbuf_a    = redund->sbuf_a;
2078     rbuf_j    = redund->rbuf_j;
2079     rbuf_a    = redund->rbuf_a;
2080   }
2081 
2082   if (reuse == MAT_INITIAL_MATRIX){
2083     PetscMPIInt  subrank,subsize;
2084     PetscInt     nleftover,np_subcomm;
2085     /* get the destination processors' id send_rank, nsends and nrecvs */
2086     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2087     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2088     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2089     recv_rank = send_rank + size;
2090     np_subcomm = size/nsubcomm;
2091     nleftover  = size - nsubcomm*np_subcomm;
2092     nsends = 0; nrecvs = 0;
2093     for (i=0; i<size; i++){ /* i=rank*/
2094       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2095         send_rank[nsends] = i; nsends++;
2096         recv_rank[nrecvs++] = i;
2097       }
2098     }
2099     if (rank >= size - nleftover){/* this proc is a leftover processor */
2100       i = size-nleftover-1;
2101       j = 0;
2102       while (j < nsubcomm - nleftover){
2103         send_rank[nsends++] = i;
2104         i--; j++;
2105       }
2106     }
2107 
2108     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2109       for (i=0; i<nleftover; i++){
2110         recv_rank[nrecvs++] = size-nleftover+i;
2111       }
2112     }
2113 
2114     /* allocate sbuf_j, sbuf_a */
2115     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2116     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2117     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2118   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2119 
2120   /* copy mat's local entries into the buffers */
2121   if (reuse == MAT_INITIAL_MATRIX){
2122     rownz_max = 0;
2123     rptr = sbuf_j;
2124     cols = sbuf_j + rend-rstart + 1;
2125     vals = sbuf_a;
2126     rptr[0] = 0;
2127     for (i=0; i<rend-rstart; i++){
2128       row = i + rstart;
2129       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2130       ncols  = nzA + nzB;
2131       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2132       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2133       /* load the column indices for this row into cols */
2134       lwrite = 0;
2135       for (l=0; l<nzB; l++) {
2136         if ((ctmp = bmap[cworkB[l]]) < cstart){
2137           vals[lwrite]   = aworkB[l];
2138           cols[lwrite++] = ctmp;
2139         }
2140       }
2141       for (l=0; l<nzA; l++){
2142         vals[lwrite]   = aworkA[l];
2143         cols[lwrite++] = cstart + cworkA[l];
2144       }
2145       for (l=0; l<nzB; l++) {
2146         if ((ctmp = bmap[cworkB[l]]) >= cend){
2147           vals[lwrite]   = aworkB[l];
2148           cols[lwrite++] = ctmp;
2149         }
2150       }
2151       vals += ncols;
2152       cols += ncols;
2153       rptr[i+1] = rptr[i] + ncols;
2154       if (rownz_max < ncols) rownz_max = ncols;
2155     }
2156     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2157   } else { /* only copy matrix values into sbuf_a */
2158     rptr = sbuf_j;
2159     vals = sbuf_a;
2160     rptr[0] = 0;
2161     for (i=0; i<rend-rstart; i++){
2162       row = i + rstart;
2163       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2164       ncols  = nzA + nzB;
2165       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2166       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2167       lwrite = 0;
2168       for (l=0; l<nzB; l++) {
2169         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2170       }
2171       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2172       for (l=0; l<nzB; l++) {
2173         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2174       }
2175       vals += ncols;
2176       rptr[i+1] = rptr[i] + ncols;
2177     }
2178   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2179 
2180   /* send nzlocal to others, and recv other's nzlocal */
2181   /*--------------------------------------------------*/
2182   if (reuse == MAT_INITIAL_MATRIX){
2183     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2184     s_waits2 = s_waits3 + nsends;
2185     s_waits1 = s_waits2 + nsends;
2186     r_waits1 = s_waits1 + nsends;
2187     r_waits2 = r_waits1 + nrecvs;
2188     r_waits3 = r_waits2 + nrecvs;
2189   } else {
2190     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2191     r_waits3 = s_waits3 + nsends;
2192   }
2193 
2194   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2195   if (reuse == MAT_INITIAL_MATRIX){
2196     /* get new tags to keep the communication clean */
2197     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2198     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2199     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2200     rbuf_nz = sbuf_nz + nsends;
2201 
2202     /* post receives of other's nzlocal */
2203     for (i=0; i<nrecvs; i++){
2204       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2205     }
2206     /* send nzlocal to others */
2207     for (i=0; i<nsends; i++){
2208       sbuf_nz[i] = nzlocal;
2209       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2210     }
2211     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2212     count = nrecvs;
2213     while (count) {
2214       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2215       recv_rank[imdex] = recv_status.MPI_SOURCE;
2216       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2217       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2218 
2219       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2220       rbuf_nz[imdex] += i + 2;
2221       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2222       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2223       count--;
2224     }
2225     /* wait on sends of nzlocal */
2226     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2227     /* send mat->i,j to others, and recv from other's */
2228     /*------------------------------------------------*/
2229     for (i=0; i<nsends; i++){
2230       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2231       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2232     }
2233     /* wait on receives of mat->i,j */
2234     /*------------------------------*/
2235     count = nrecvs;
2236     while (count) {
2237       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2238       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2239       count--;
2240     }
2241     /* wait on sends of mat->i,j */
2242     /*---------------------------*/
2243     if (nsends) {
2244       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2245     }
2246   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2247 
2248   /* post receives, send and receive mat->a */
2249   /*----------------------------------------*/
2250   for (imdex=0; imdex<nrecvs; imdex++) {
2251     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2252   }
2253   for (i=0; i<nsends; i++){
2254     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2255   }
2256   count = nrecvs;
2257   while (count) {
2258     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2259     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2260     count--;
2261   }
2262   if (nsends) {
2263     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2264   }
2265 
2266   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2267 
2268   /* create redundant matrix */
2269   /*-------------------------*/
2270   if (reuse == MAT_INITIAL_MATRIX){
2271     /* compute rownz_max for preallocation */
2272     for (imdex=0; imdex<nrecvs; imdex++){
2273       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2274       rptr = rbuf_j[imdex];
2275       for (i=0; i<j; i++){
2276         ncols = rptr[i+1] - rptr[i];
2277         if (rownz_max < ncols) rownz_max = ncols;
2278       }
2279     }
2280 
2281     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2282     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2283     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2284     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2285     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2286   } else {
2287     C = *matredundant;
2288   }
2289 
2290   /* insert local matrix entries */
2291   rptr = sbuf_j;
2292   cols = sbuf_j + rend-rstart + 1;
2293   vals = sbuf_a;
2294   for (i=0; i<rend-rstart; i++){
2295     row   = i + rstart;
2296     ncols = rptr[i+1] - rptr[i];
2297     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2298     vals += ncols;
2299     cols += ncols;
2300   }
2301   /* insert received matrix entries */
2302   for (imdex=0; imdex<nrecvs; imdex++){
2303     rstart = rowrange[recv_rank[imdex]];
2304     rend   = rowrange[recv_rank[imdex]+1];
2305     rptr = rbuf_j[imdex];
2306     cols = rbuf_j[imdex] + rend-rstart + 1;
2307     vals = rbuf_a[imdex];
2308     for (i=0; i<rend-rstart; i++){
2309       row   = i + rstart;
2310       ncols = rptr[i+1] - rptr[i];
2311       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2312       vals += ncols;
2313       cols += ncols;
2314     }
2315   }
2316   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2317   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2318   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2319   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2320   if (reuse == MAT_INITIAL_MATRIX){
2321     PetscContainer container;
2322     *matredundant = C;
2323     /* create a supporting struct and attach it to C for reuse */
2324     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2325     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2326     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2327     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2328     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2329 
2330     redund->nzlocal = nzlocal;
2331     redund->nsends  = nsends;
2332     redund->nrecvs  = nrecvs;
2333     redund->send_rank = send_rank;
2334     redund->sbuf_nz = sbuf_nz;
2335     redund->sbuf_j  = sbuf_j;
2336     redund->sbuf_a  = sbuf_a;
2337     redund->rbuf_j  = rbuf_j;
2338     redund->rbuf_a  = rbuf_a;
2339 
2340     redund->MatDestroy = C->ops->destroy;
2341     C->ops->destroy    = MatDestroy_MatRedundant;
2342   }
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2348 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2349 {
2350   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2351   PetscErrorCode ierr;
2352   PetscInt       i,*idxb = 0;
2353   PetscScalar    *va,*vb;
2354   Vec            vtmp;
2355 
2356   PetscFunctionBegin;
2357   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2358   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2359   if (idx) {
2360     for (i=0; i<A->rmap->n; i++) {
2361       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2362     }
2363   }
2364 
2365   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2366   if (idx) {
2367     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2368   }
2369   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2370   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2371 
2372   for (i=0; i<A->rmap->n; i++){
2373     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2374       va[i] = vb[i];
2375       if (idx) idx[i] = a->garray[idxb[i]];
2376     }
2377   }
2378 
2379   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2380   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2381   if (idxb) {
2382     ierr = PetscFree(idxb);CHKERRQ(ierr);
2383   }
2384   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2390 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2391 {
2392   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2393   PetscErrorCode ierr;
2394   PetscInt       i,*idxb = 0;
2395   PetscScalar    *va,*vb;
2396   Vec            vtmp;
2397 
2398   PetscFunctionBegin;
2399   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2400   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2401   if (idx) {
2402     for (i=0; i<A->cmap->n; i++) {
2403       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2404     }
2405   }
2406 
2407   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2408   if (idx) {
2409     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2410   }
2411   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2412   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2413 
2414   for (i=0; i<A->rmap->n; i++){
2415     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2416       va[i] = vb[i];
2417       if (idx) idx[i] = a->garray[idxb[i]];
2418     }
2419   }
2420 
2421   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2422   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2423   if (idxb) {
2424     ierr = PetscFree(idxb);CHKERRQ(ierr);
2425   }
2426   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 #undef __FUNCT__
2431 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2432 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2433 {
2434   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2435   PetscInt       n      = A->rmap->n;
2436   PetscInt       cstart = A->cmap->rstart;
2437   PetscInt      *cmap   = mat->garray;
2438   PetscInt      *diagIdx, *offdiagIdx;
2439   Vec            diagV, offdiagV;
2440   PetscScalar   *a, *diagA, *offdiagA;
2441   PetscInt       r;
2442   PetscErrorCode ierr;
2443 
2444   PetscFunctionBegin;
2445   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2446   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2447   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2448   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2449   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2450   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2451   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2452   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2453   for(r = 0; r < n; ++r) {
2454     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2455       a[r]   = diagA[r];
2456       idx[r] = cstart + diagIdx[r];
2457     } else {
2458       a[r]   = offdiagA[r];
2459       idx[r] = cmap[offdiagIdx[r]];
2460     }
2461   }
2462   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2463   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2464   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2465   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2466   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2467   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2473 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2474 {
2475   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2476   PetscInt       n      = A->rmap->n;
2477   PetscInt       cstart = A->cmap->rstart;
2478   PetscInt      *cmap   = mat->garray;
2479   PetscInt      *diagIdx, *offdiagIdx;
2480   Vec            diagV, offdiagV;
2481   PetscScalar   *a, *diagA, *offdiagA;
2482   PetscInt       r;
2483   PetscErrorCode ierr;
2484 
2485   PetscFunctionBegin;
2486   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2487   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2488   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2489   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2490   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2491   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2492   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2493   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2494   for(r = 0; r < n; ++r) {
2495     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2496       a[r]   = diagA[r];
2497       idx[r] = cstart + diagIdx[r];
2498     } else {
2499       a[r]   = offdiagA[r];
2500       idx[r] = cmap[offdiagIdx[r]];
2501     }
2502   }
2503   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2504   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2505   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2506   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2507   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2508   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2509   PetscFunctionReturn(0);
2510 }
2511 
2512 #undef __FUNCT__
2513 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2514 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2515 {
2516   PetscErrorCode ierr;
2517 
2518   PetscFunctionBegin;
2519   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 /* -------------------------------------------------------------------*/
2524 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2525        MatGetRow_MPIAIJ,
2526        MatRestoreRow_MPIAIJ,
2527        MatMult_MPIAIJ,
2528 /* 4*/ MatMultAdd_MPIAIJ,
2529        MatMultTranspose_MPIAIJ,
2530        MatMultTransposeAdd_MPIAIJ,
2531 #ifdef PETSC_HAVE_PBGL
2532        MatSolve_MPIAIJ,
2533 #else
2534        0,
2535 #endif
2536        0,
2537        0,
2538 /*10*/ 0,
2539        0,
2540        0,
2541        MatRelax_MPIAIJ,
2542        MatTranspose_MPIAIJ,
2543 /*15*/ MatGetInfo_MPIAIJ,
2544        MatEqual_MPIAIJ,
2545        MatGetDiagonal_MPIAIJ,
2546        MatDiagonalScale_MPIAIJ,
2547        MatNorm_MPIAIJ,
2548 /*20*/ MatAssemblyBegin_MPIAIJ,
2549        MatAssemblyEnd_MPIAIJ,
2550        0,
2551        MatSetOption_MPIAIJ,
2552        MatZeroEntries_MPIAIJ,
2553 /*25*/ MatZeroRows_MPIAIJ,
2554        0,
2555 #ifdef PETSC_HAVE_PBGL
2556        0,
2557 #else
2558        0,
2559 #endif
2560        0,
2561        0,
2562 /*30*/ MatSetUpPreallocation_MPIAIJ,
2563 #ifdef PETSC_HAVE_PBGL
2564        0,
2565 #else
2566        0,
2567 #endif
2568        0,
2569        0,
2570        0,
2571 /*35*/ MatDuplicate_MPIAIJ,
2572        0,
2573        0,
2574        0,
2575        0,
2576 /*40*/ MatAXPY_MPIAIJ,
2577        MatGetSubMatrices_MPIAIJ,
2578        MatIncreaseOverlap_MPIAIJ,
2579        MatGetValues_MPIAIJ,
2580        MatCopy_MPIAIJ,
2581 /*45*/ MatGetRowMax_MPIAIJ,
2582        MatScale_MPIAIJ,
2583        0,
2584        0,
2585        0,
2586 /*50*/ MatSetBlockSize_MPIAIJ,
2587        0,
2588        0,
2589        0,
2590        0,
2591 /*55*/ MatFDColoringCreate_MPIAIJ,
2592        0,
2593        MatSetUnfactored_MPIAIJ,
2594        MatPermute_MPIAIJ,
2595        0,
2596 /*60*/ MatGetSubMatrix_MPIAIJ,
2597        MatDestroy_MPIAIJ,
2598        MatView_MPIAIJ,
2599        0,
2600        0,
2601 /*65*/ 0,
2602        0,
2603        0,
2604        0,
2605        0,
2606 /*70*/ MatGetRowMaxAbs_MPIAIJ,
2607        MatGetRowMinAbs_MPIAIJ,
2608        0,
2609        MatSetColoring_MPIAIJ,
2610 #if defined(PETSC_HAVE_ADIC)
2611        MatSetValuesAdic_MPIAIJ,
2612 #else
2613        0,
2614 #endif
2615        MatSetValuesAdifor_MPIAIJ,
2616 /*75*/ 0,
2617        0,
2618        0,
2619        0,
2620        0,
2621 /*80*/ 0,
2622        0,
2623        0,
2624 /*84*/ MatLoad_MPIAIJ,
2625        0,
2626        0,
2627        0,
2628        0,
2629        0,
2630 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2631        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2632        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2633        MatPtAP_Basic,
2634        MatPtAPSymbolic_MPIAIJ,
2635 /*95*/ MatPtAPNumeric_MPIAIJ,
2636        0,
2637        0,
2638        0,
2639        0,
2640 /*100*/0,
2641        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2642        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2643        MatConjugate_MPIAIJ,
2644        0,
2645 /*105*/MatSetValuesRow_MPIAIJ,
2646        MatRealPart_MPIAIJ,
2647        MatImaginaryPart_MPIAIJ,
2648        0,
2649        0,
2650 /*110*/0,
2651        MatGetRedundantMatrix_MPIAIJ,
2652        MatGetRowMin_MPIAIJ,
2653        0,
2654        0,
2655 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2656 
2657 /* ----------------------------------------------------------------------------------------*/
2658 
2659 EXTERN_C_BEGIN
2660 #undef __FUNCT__
2661 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2662 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2663 {
2664   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2665   PetscErrorCode ierr;
2666 
2667   PetscFunctionBegin;
2668   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2669   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2670   PetscFunctionReturn(0);
2671 }
2672 EXTERN_C_END
2673 
2674 EXTERN_C_BEGIN
2675 #undef __FUNCT__
2676 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2677 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2678 {
2679   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2680   PetscErrorCode ierr;
2681 
2682   PetscFunctionBegin;
2683   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2684   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2685   PetscFunctionReturn(0);
2686 }
2687 EXTERN_C_END
2688 
2689 #include "petscpc.h"
2690 EXTERN_C_BEGIN
2691 #undef __FUNCT__
2692 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2693 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2694 {
2695   Mat_MPIAIJ     *b;
2696   PetscErrorCode ierr;
2697   PetscInt       i;
2698 
2699   PetscFunctionBegin;
2700   B->preallocated = PETSC_TRUE;
2701   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2702   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2703   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2704   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2705 
2706   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2707   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2708   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2709   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2710   if (d_nnz) {
2711     for (i=0; i<B->rmap->n; i++) {
2712       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2713     }
2714   }
2715   if (o_nnz) {
2716     for (i=0; i<B->rmap->n; i++) {
2717       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2718     }
2719   }
2720   b = (Mat_MPIAIJ*)B->data;
2721 
2722   /* Explicitly create 2 MATSEQAIJ matrices. */
2723   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2724   ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2725   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2726   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2727   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2728   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2729   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2730   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2731 
2732   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2733   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2734 
2735   PetscFunctionReturn(0);
2736 }
2737 EXTERN_C_END
2738 
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2741 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2742 {
2743   Mat            mat;
2744   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2745   PetscErrorCode ierr;
2746 
2747   PetscFunctionBegin;
2748   *newmat       = 0;
2749   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2750   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2751   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2752   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2753   a    = (Mat_MPIAIJ*)mat->data;
2754 
2755   mat->factor       = matin->factor;
2756   mat->rmap->bs      = matin->rmap->bs;
2757   mat->assembled    = PETSC_TRUE;
2758   mat->insertmode   = NOT_SET_VALUES;
2759   mat->preallocated = PETSC_TRUE;
2760 
2761   a->size           = oldmat->size;
2762   a->rank           = oldmat->rank;
2763   a->donotstash     = oldmat->donotstash;
2764   a->roworiented    = oldmat->roworiented;
2765   a->rowindices     = 0;
2766   a->rowvalues      = 0;
2767   a->getrowactive   = PETSC_FALSE;
2768 
2769   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2770   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2771 
2772   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2773   if (oldmat->colmap) {
2774 #if defined (PETSC_USE_CTABLE)
2775     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2776 #else
2777     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2778     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2779     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2780 #endif
2781   } else a->colmap = 0;
2782   if (oldmat->garray) {
2783     PetscInt len;
2784     len  = oldmat->B->cmap->n;
2785     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2786     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2787     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2788   } else a->garray = 0;
2789 
2790   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2791   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2792   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2793   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2794   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2795   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2796   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2797   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2798   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2799   *newmat = mat;
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 #include "petscsys.h"
2804 
2805 #undef __FUNCT__
2806 #define __FUNCT__ "MatLoad_MPIAIJ"
2807 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2808 {
2809   Mat            A;
2810   PetscScalar    *vals,*svals;
2811   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2812   MPI_Status     status;
2813   PetscErrorCode ierr;
2814   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2815   PetscInt       i,nz,j,rstart,rend,mmax,maxnz;
2816   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2817   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2818   PetscInt       cend,cstart,n,*rowners;
2819   int            fd;
2820 
2821   PetscFunctionBegin;
2822   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2823   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2824   if (!rank) {
2825     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2826     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2827     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2828   }
2829 
2830   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2831   M = header[1]; N = header[2];
2832   /* determine ownership of all rows */
2833   m    = M/size + ((M % size) > rank);
2834   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2835   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2836 
2837   /* First process needs enough room for process with most rows */
2838   if (!rank) {
2839     mmax       = rowners[1];
2840     for (i=2; i<size; i++) {
2841       mmax = PetscMax(mmax,rowners[i]);
2842     }
2843   } else mmax = m;
2844 
2845   rowners[0] = 0;
2846   for (i=2; i<=size; i++) {
2847     rowners[i] += rowners[i-1];
2848   }
2849   rstart = rowners[rank];
2850   rend   = rowners[rank+1];
2851 
2852   /* distribute row lengths to all processors */
2853   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2854   if (!rank) {
2855     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2856     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2857     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2858     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2859     for (j=0; j<m; j++) {
2860       procsnz[0] += ourlens[j];
2861     }
2862     for (i=1; i<size; i++) {
2863       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2864       /* calculate the number of nonzeros on each processor */
2865       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2866         procsnz[i] += rowlengths[j];
2867       }
2868       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2869       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2870     }
2871     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2872   } else {
2873     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2874     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2875   }
2876 
2877   if (!rank) {
2878     /* determine max buffer needed and allocate it */
2879     maxnz = 0;
2880     for (i=0; i<size; i++) {
2881       maxnz = PetscMax(maxnz,procsnz[i]);
2882     }
2883     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2884 
2885     /* read in my part of the matrix column indices  */
2886     nz   = procsnz[0];
2887     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2888     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2889 
2890     /* read in every one elses and ship off */
2891     for (i=1; i<size; i++) {
2892       nz     = procsnz[i];
2893       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2894       mpicnt = PetscMPIIntCast(nz);
2895       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2896     }
2897     ierr = PetscFree(cols);CHKERRQ(ierr);
2898   } else {
2899     /* determine buffer space needed for message */
2900     nz = 0;
2901     for (i=0; i<m; i++) {
2902       nz += ourlens[i];
2903     }
2904     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2905 
2906     /* receive message of column indices*/
2907     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2908     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2909     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2910     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2911     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2912     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2913   }
2914 
2915   /* determine column ownership if matrix is not square */
2916   if (N != M) {
2917     n      = N/size + ((N % size) > rank);
2918     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2919     cstart = cend - n;
2920   } else {
2921     cstart = rstart;
2922     cend   = rend;
2923     n      = cend - cstart;
2924   }
2925 
2926   /* loop over local rows, determining number of off diagonal entries */
2927   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2928   jj = 0;
2929   for (i=0; i<m; i++) {
2930     for (j=0; j<ourlens[i]; j++) {
2931       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2932       jj++;
2933     }
2934   }
2935 
2936   /* create our matrix */
2937   for (i=0; i<m; i++) {
2938     ourlens[i] -= offlens[i];
2939   }
2940   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2941   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2942   ierr = MatSetType(A,type);CHKERRQ(ierr);
2943   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2944 
2945   for (i=0; i<m; i++) {
2946     ourlens[i] += offlens[i];
2947   }
2948 
2949   if (!rank) {
2950     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2951 
2952     /* read in my part of the matrix numerical values  */
2953     nz   = procsnz[0];
2954     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2955 
2956     /* insert into matrix */
2957     jj      = rstart;
2958     smycols = mycols;
2959     svals   = vals;
2960     for (i=0; i<m; i++) {
2961       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2962       smycols += ourlens[i];
2963       svals   += ourlens[i];
2964       jj++;
2965     }
2966 
2967     /* read in other processors and ship out */
2968     for (i=1; i<size; i++) {
2969       nz     = procsnz[i];
2970       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2971       mpicnt = PetscMPIIntCast(nz);
2972       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2973     }
2974     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2975   } else {
2976     /* receive numeric values */
2977     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2978 
2979     /* receive message of values*/
2980     mpicnt = PetscMPIIntCast(nz);
2981     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2982     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
2983     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2984     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2985     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2986 
2987     /* insert into matrix */
2988     jj      = rstart;
2989     smycols = mycols;
2990     svals   = vals;
2991     for (i=0; i<m; i++) {
2992       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2993       smycols += ourlens[i];
2994       svals   += ourlens[i];
2995       jj++;
2996     }
2997   }
2998   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2999   ierr = PetscFree(vals);CHKERRQ(ierr);
3000   ierr = PetscFree(mycols);CHKERRQ(ierr);
3001   ierr = PetscFree(rowners);CHKERRQ(ierr);
3002 
3003   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3004   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3005   *newmat = A;
3006   PetscFunctionReturn(0);
3007 }
3008 
3009 #undef __FUNCT__
3010 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3011 /*
3012     Not great since it makes two copies of the submatrix, first an SeqAIJ
3013   in local and then by concatenating the local matrices the end result.
3014   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3015 */
3016 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3017 {
3018   PetscErrorCode ierr;
3019   PetscMPIInt    rank,size;
3020   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3021   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3022   Mat            *local,M,Mreuse;
3023   MatScalar      *vwork,*aa;
3024   MPI_Comm       comm = ((PetscObject)mat)->comm;
3025   Mat_SeqAIJ     *aij;
3026 
3027 
3028   PetscFunctionBegin;
3029   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3030   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3031 
3032   if (call ==  MAT_REUSE_MATRIX) {
3033     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3034     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3035     local = &Mreuse;
3036     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3037   } else {
3038     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3039     Mreuse = *local;
3040     ierr   = PetscFree(local);CHKERRQ(ierr);
3041   }
3042 
3043   /*
3044       m - number of local rows
3045       n - number of columns (same on all processors)
3046       rstart - first row in new global matrix generated
3047   */
3048   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3049   if (call == MAT_INITIAL_MATRIX) {
3050     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3051     ii  = aij->i;
3052     jj  = aij->j;
3053 
3054     /*
3055         Determine the number of non-zeros in the diagonal and off-diagonal
3056         portions of the matrix in order to do correct preallocation
3057     */
3058 
3059     /* first get start and end of "diagonal" columns */
3060     if (csize == PETSC_DECIDE) {
3061       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3062       if (mglobal == n) { /* square matrix */
3063 	nlocal = m;
3064       } else {
3065         nlocal = n/size + ((n % size) > rank);
3066       }
3067     } else {
3068       nlocal = csize;
3069     }
3070     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3071     rstart = rend - nlocal;
3072     if (rank == size - 1 && rend != n) {
3073       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3074     }
3075 
3076     /* next, compute all the lengths */
3077     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3078     olens = dlens + m;
3079     for (i=0; i<m; i++) {
3080       jend = ii[i+1] - ii[i];
3081       olen = 0;
3082       dlen = 0;
3083       for (j=0; j<jend; j++) {
3084         if (*jj < rstart || *jj >= rend) olen++;
3085         else dlen++;
3086         jj++;
3087       }
3088       olens[i] = olen;
3089       dlens[i] = dlen;
3090     }
3091     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3092     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3093     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3094     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3095     ierr = PetscFree(dlens);CHKERRQ(ierr);
3096   } else {
3097     PetscInt ml,nl;
3098 
3099     M = *newmat;
3100     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3101     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3102     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3103     /*
3104          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3105        rather than the slower MatSetValues().
3106     */
3107     M->was_assembled = PETSC_TRUE;
3108     M->assembled     = PETSC_FALSE;
3109   }
3110   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3111   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3112   ii  = aij->i;
3113   jj  = aij->j;
3114   aa  = aij->a;
3115   for (i=0; i<m; i++) {
3116     row   = rstart + i;
3117     nz    = ii[i+1] - ii[i];
3118     cwork = jj;     jj += nz;
3119     vwork = aa;     aa += nz;
3120     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3121   }
3122 
3123   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3124   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3125   *newmat = M;
3126 
3127   /* save submatrix used in processor for next request */
3128   if (call ==  MAT_INITIAL_MATRIX) {
3129     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3130     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3131   }
3132 
3133   PetscFunctionReturn(0);
3134 }
3135 
3136 EXTERN_C_BEGIN
3137 #undef __FUNCT__
3138 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3139 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3140 {
3141   PetscInt       m,cstart, cend,j,nnz,i,d;
3142   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3143   const PetscInt *JJ;
3144   PetscScalar    *values;
3145   PetscErrorCode ierr;
3146 
3147   PetscFunctionBegin;
3148   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3149 
3150   ierr = PetscMapSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3151   ierr = PetscMapSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3152   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3153   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3154   m      = B->rmap->n;
3155   cstart = B->cmap->rstart;
3156   cend   = B->cmap->rend;
3157   rstart = B->rmap->rstart;
3158 
3159   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3160   o_nnz = d_nnz + m;
3161 
3162 #if defined(PETSC_USE_DEBUGGING)
3163   for (i=0; i<m; i++) {
3164     nnz     = Ii[i+1]- Ii[i];
3165     JJ      = J + Ii[i];
3166     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3167     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3168     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3169     for (j=1; j<nnz; j++) {
3170       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3171     }
3172   }
3173 #endif
3174 
3175   for (i=0; i<m; i++) {
3176     nnz     = Ii[i+1]- Ii[i];
3177     JJ      = J + Ii[i];
3178     nnz_max = PetscMax(nnz_max,nnz);
3179     for (j=0; j<nnz; j++) {
3180       if (*JJ >= cstart) break;
3181       JJ++;
3182     }
3183     d = 0;
3184     for (; j<nnz; j++) {
3185       if (*JJ++ >= cend) break;
3186       d++;
3187     }
3188     d_nnz[i] = d;
3189     o_nnz[i] = nnz - d;
3190   }
3191   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3192   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3193 
3194   if (v) values = (PetscScalar*)v;
3195   else {
3196     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3197     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3198   }
3199 
3200   for (i=0; i<m; i++) {
3201     ii   = i + rstart;
3202     nnz  = Ii[i+1]- Ii[i];
3203     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3204   }
3205   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3206   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3207 
3208   if (!v) {
3209     ierr = PetscFree(values);CHKERRQ(ierr);
3210   }
3211   PetscFunctionReturn(0);
3212 }
3213 EXTERN_C_END
3214 
3215 #undef __FUNCT__
3216 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3217 /*@
3218    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3219    (the default parallel PETSc format).
3220 
3221    Collective on MPI_Comm
3222 
3223    Input Parameters:
3224 +  B - the matrix
3225 .  i - the indices into j for the start of each local row (starts with zero)
3226 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3227 -  v - optional values in the matrix
3228 
3229    Level: developer
3230 
3231    Notes:
3232        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3233      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3234      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3235 
3236        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3237 
3238        The format which is used for the sparse matrix input, is equivalent to a
3239     row-major ordering.. i.e for the following matrix, the input data expected is
3240     as shown:
3241 
3242         1 0 0
3243         2 0 3     P0
3244        -------
3245         4 5 6     P1
3246 
3247      Process0 [P0]: rows_owned=[0,1]
3248         i =  {0,1,3}  [size = nrow+1  = 2+1]
3249         j =  {0,0,2}  [size = nz = 6]
3250         v =  {1,2,3}  [size = nz = 6]
3251 
3252      Process1 [P1]: rows_owned=[2]
3253         i =  {0,3}    [size = nrow+1  = 1+1]
3254         j =  {0,1,2}  [size = nz = 6]
3255         v =  {4,5,6}  [size = nz = 6]
3256 
3257       The column indices for each row MUST be sorted.
3258 
3259 .keywords: matrix, aij, compressed row, sparse, parallel
3260 
3261 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3262           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3263 @*/
3264 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3265 {
3266   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3267 
3268   PetscFunctionBegin;
3269   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3270   if (f) {
3271     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3272   }
3273   PetscFunctionReturn(0);
3274 }
3275 
3276 #undef __FUNCT__
3277 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3278 /*@C
3279    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3280    (the default parallel PETSc format).  For good matrix assembly performance
3281    the user should preallocate the matrix storage by setting the parameters
3282    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3283    performance can be increased by more than a factor of 50.
3284 
3285    Collective on MPI_Comm
3286 
3287    Input Parameters:
3288 +  A - the matrix
3289 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3290            (same value is used for all local rows)
3291 .  d_nnz - array containing the number of nonzeros in the various rows of the
3292            DIAGONAL portion of the local submatrix (possibly different for each row)
3293            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3294            The size of this array is equal to the number of local rows, i.e 'm'.
3295            You must leave room for the diagonal entry even if it is zero.
3296 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3297            submatrix (same value is used for all local rows).
3298 -  o_nnz - array containing the number of nonzeros in the various rows of the
3299            OFF-DIAGONAL portion of the local submatrix (possibly different for
3300            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3301            structure. The size of this array is equal to the number
3302            of local rows, i.e 'm'.
3303 
3304    If the *_nnz parameter is given then the *_nz parameter is ignored
3305 
3306    The AIJ format (also called the Yale sparse matrix format or
3307    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3308    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3309 
3310    The parallel matrix is partitioned such that the first m0 rows belong to
3311    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3312    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3313 
3314    The DIAGONAL portion of the local submatrix of a processor can be defined
3315    as the submatrix which is obtained by extraction the part corresponding
3316    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3317    first row that belongs to the processor, and r2 is the last row belonging
3318    to the this processor. This is a square mxm matrix. The remaining portion
3319    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3320 
3321    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3322 
3323    You can call MatGetInfo() to get information on how effective the preallocation was;
3324    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3325    You can also run with the option -info and look for messages with the string
3326    malloc in them to see if additional memory allocation was needed.
3327 
3328    Example usage:
3329 
3330    Consider the following 8x8 matrix with 34 non-zero values, that is
3331    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3332    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3333    as follows:
3334 
3335 .vb
3336             1  2  0  |  0  3  0  |  0  4
3337     Proc0   0  5  6  |  7  0  0  |  8  0
3338             9  0 10  | 11  0  0  | 12  0
3339     -------------------------------------
3340            13  0 14  | 15 16 17  |  0  0
3341     Proc1   0 18  0  | 19 20 21  |  0  0
3342             0  0  0  | 22 23  0  | 24  0
3343     -------------------------------------
3344     Proc2  25 26 27  |  0  0 28  | 29  0
3345            30  0  0  | 31 32 33  |  0 34
3346 .ve
3347 
3348    This can be represented as a collection of submatrices as:
3349 
3350 .vb
3351       A B C
3352       D E F
3353       G H I
3354 .ve
3355 
3356    Where the submatrices A,B,C are owned by proc0, D,E,F are
3357    owned by proc1, G,H,I are owned by proc2.
3358 
3359    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3360    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3361    The 'M','N' parameters are 8,8, and have the same values on all procs.
3362 
3363    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3364    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3365    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3366    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3367    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3368    matrix, ans [DF] as another SeqAIJ matrix.
3369 
3370    When d_nz, o_nz parameters are specified, d_nz storage elements are
3371    allocated for every row of the local diagonal submatrix, and o_nz
3372    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3373    One way to choose d_nz and o_nz is to use the max nonzerors per local
3374    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3375    In this case, the values of d_nz,o_nz are:
3376 .vb
3377      proc0 : dnz = 2, o_nz = 2
3378      proc1 : dnz = 3, o_nz = 2
3379      proc2 : dnz = 1, o_nz = 4
3380 .ve
3381    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3382    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3383    for proc3. i.e we are using 12+15+10=37 storage locations to store
3384    34 values.
3385 
3386    When d_nnz, o_nnz parameters are specified, the storage is specified
3387    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3388    In the above case the values for d_nnz,o_nnz are:
3389 .vb
3390      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3391      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3392      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3393 .ve
3394    Here the space allocated is sum of all the above values i.e 34, and
3395    hence pre-allocation is perfect.
3396 
3397    Level: intermediate
3398 
3399 .keywords: matrix, aij, compressed row, sparse, parallel
3400 
3401 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3402           MPIAIJ, MatGetInfo()
3403 @*/
3404 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3405 {
3406   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3407 
3408   PetscFunctionBegin;
3409   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3410   if (f) {
3411     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3412   }
3413   PetscFunctionReturn(0);
3414 }
3415 
3416 #undef __FUNCT__
3417 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3418 /*@
3419      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3420          CSR format the local rows.
3421 
3422    Collective on MPI_Comm
3423 
3424    Input Parameters:
3425 +  comm - MPI communicator
3426 .  m - number of local rows (Cannot be PETSC_DECIDE)
3427 .  n - This value should be the same as the local size used in creating the
3428        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3429        calculated if N is given) For square matrices n is almost always m.
3430 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3431 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3432 .   i - row indices
3433 .   j - column indices
3434 -   a - matrix values
3435 
3436    Output Parameter:
3437 .   mat - the matrix
3438 
3439    Level: intermediate
3440 
3441    Notes:
3442        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3443      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3444      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3445 
3446        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3447 
3448        The format which is used for the sparse matrix input, is equivalent to a
3449     row-major ordering.. i.e for the following matrix, the input data expected is
3450     as shown:
3451 
3452         1 0 0
3453         2 0 3     P0
3454        -------
3455         4 5 6     P1
3456 
3457      Process0 [P0]: rows_owned=[0,1]
3458         i =  {0,1,3}  [size = nrow+1  = 2+1]
3459         j =  {0,0,2}  [size = nz = 6]
3460         v =  {1,2,3}  [size = nz = 6]
3461 
3462      Process1 [P1]: rows_owned=[2]
3463         i =  {0,3}    [size = nrow+1  = 1+1]
3464         j =  {0,1,2}  [size = nz = 6]
3465         v =  {4,5,6}  [size = nz = 6]
3466 
3467 .keywords: matrix, aij, compressed row, sparse, parallel
3468 
3469 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3470           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3471 @*/
3472 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3473 {
3474   PetscErrorCode ierr;
3475 
3476  PetscFunctionBegin;
3477   if (i[0]) {
3478     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3479   }
3480   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3481   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3482   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3483   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3484   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3485   PetscFunctionReturn(0);
3486 }
3487 
3488 #undef __FUNCT__
3489 #define __FUNCT__ "MatCreateMPIAIJ"
3490 /*@C
3491    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3492    (the default parallel PETSc format).  For good matrix assembly performance
3493    the user should preallocate the matrix storage by setting the parameters
3494    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3495    performance can be increased by more than a factor of 50.
3496 
3497    Collective on MPI_Comm
3498 
3499    Input Parameters:
3500 +  comm - MPI communicator
3501 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3502            This value should be the same as the local size used in creating the
3503            y vector for the matrix-vector product y = Ax.
3504 .  n - This value should be the same as the local size used in creating the
3505        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3506        calculated if N is given) For square matrices n is almost always m.
3507 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3508 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3509 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3510            (same value is used for all local rows)
3511 .  d_nnz - array containing the number of nonzeros in the various rows of the
3512            DIAGONAL portion of the local submatrix (possibly different for each row)
3513            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3514            The size of this array is equal to the number of local rows, i.e 'm'.
3515            You must leave room for the diagonal entry even if it is zero.
3516 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3517            submatrix (same value is used for all local rows).
3518 -  o_nnz - array containing the number of nonzeros in the various rows of the
3519            OFF-DIAGONAL portion of the local submatrix (possibly different for
3520            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3521            structure. The size of this array is equal to the number
3522            of local rows, i.e 'm'.
3523 
3524    Output Parameter:
3525 .  A - the matrix
3526 
3527    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3528    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3529    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3530 
3531    Notes:
3532    If the *_nnz parameter is given then the *_nz parameter is ignored
3533 
3534    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3535    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3536    storage requirements for this matrix.
3537 
3538    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3539    processor than it must be used on all processors that share the object for
3540    that argument.
3541 
3542    The user MUST specify either the local or global matrix dimensions
3543    (possibly both).
3544 
3545    The parallel matrix is partitioned across processors such that the
3546    first m0 rows belong to process 0, the next m1 rows belong to
3547    process 1, the next m2 rows belong to process 2 etc.. where
3548    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3549    values corresponding to [m x N] submatrix.
3550 
3551    The columns are logically partitioned with the n0 columns belonging
3552    to 0th partition, the next n1 columns belonging to the next
3553    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3554 
3555    The DIAGONAL portion of the local submatrix on any given processor
3556    is the submatrix corresponding to the rows and columns m,n
3557    corresponding to the given processor. i.e diagonal matrix on
3558    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3559    etc. The remaining portion of the local submatrix [m x (N-n)]
3560    constitute the OFF-DIAGONAL portion. The example below better
3561    illustrates this concept.
3562 
3563    For a square global matrix we define each processor's diagonal portion
3564    to be its local rows and the corresponding columns (a square submatrix);
3565    each processor's off-diagonal portion encompasses the remainder of the
3566    local matrix (a rectangular submatrix).
3567 
3568    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3569 
3570    When calling this routine with a single process communicator, a matrix of
3571    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3572    type of communicator, use the construction mechanism:
3573      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3574 
3575    By default, this format uses inodes (identical nodes) when possible.
3576    We search for consecutive rows with the same nonzero structure, thereby
3577    reusing matrix information to achieve increased efficiency.
3578 
3579    Options Database Keys:
3580 +  -mat_no_inode  - Do not use inodes
3581 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3582 -  -mat_aij_oneindex - Internally use indexing starting at 1
3583         rather than 0.  Note that when calling MatSetValues(),
3584         the user still MUST index entries starting at 0!
3585 
3586 
3587    Example usage:
3588 
3589    Consider the following 8x8 matrix with 34 non-zero values, that is
3590    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3591    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3592    as follows:
3593 
3594 .vb
3595             1  2  0  |  0  3  0  |  0  4
3596     Proc0   0  5  6  |  7  0  0  |  8  0
3597             9  0 10  | 11  0  0  | 12  0
3598     -------------------------------------
3599            13  0 14  | 15 16 17  |  0  0
3600     Proc1   0 18  0  | 19 20 21  |  0  0
3601             0  0  0  | 22 23  0  | 24  0
3602     -------------------------------------
3603     Proc2  25 26 27  |  0  0 28  | 29  0
3604            30  0  0  | 31 32 33  |  0 34
3605 .ve
3606 
3607    This can be represented as a collection of submatrices as:
3608 
3609 .vb
3610       A B C
3611       D E F
3612       G H I
3613 .ve
3614 
3615    Where the submatrices A,B,C are owned by proc0, D,E,F are
3616    owned by proc1, G,H,I are owned by proc2.
3617 
3618    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3619    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3620    The 'M','N' parameters are 8,8, and have the same values on all procs.
3621 
3622    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3623    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3624    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3625    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3626    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3627    matrix, ans [DF] as another SeqAIJ matrix.
3628 
3629    When d_nz, o_nz parameters are specified, d_nz storage elements are
3630    allocated for every row of the local diagonal submatrix, and o_nz
3631    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3632    One way to choose d_nz and o_nz is to use the max nonzerors per local
3633    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3634    In this case, the values of d_nz,o_nz are:
3635 .vb
3636      proc0 : dnz = 2, o_nz = 2
3637      proc1 : dnz = 3, o_nz = 2
3638      proc2 : dnz = 1, o_nz = 4
3639 .ve
3640    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3641    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3642    for proc3. i.e we are using 12+15+10=37 storage locations to store
3643    34 values.
3644 
3645    When d_nnz, o_nnz parameters are specified, the storage is specified
3646    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3647    In the above case the values for d_nnz,o_nnz are:
3648 .vb
3649      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3650      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3651      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3652 .ve
3653    Here the space allocated is sum of all the above values i.e 34, and
3654    hence pre-allocation is perfect.
3655 
3656    Level: intermediate
3657 
3658 .keywords: matrix, aij, compressed row, sparse, parallel
3659 
3660 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3661           MPIAIJ, MatCreateMPIAIJWithArrays()
3662 @*/
3663 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3664 {
3665   PetscErrorCode ierr;
3666   PetscMPIInt    size;
3667 
3668   PetscFunctionBegin;
3669   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3670   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3671   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3672   if (size > 1) {
3673     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3674     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3675   } else {
3676     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3677     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3678   }
3679   PetscFunctionReturn(0);
3680 }
3681 
3682 #undef __FUNCT__
3683 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3684 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3685 {
3686   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3687 
3688   PetscFunctionBegin;
3689   *Ad     = a->A;
3690   *Ao     = a->B;
3691   *colmap = a->garray;
3692   PetscFunctionReturn(0);
3693 }
3694 
3695 #undef __FUNCT__
3696 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3697 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3698 {
3699   PetscErrorCode ierr;
3700   PetscInt       i;
3701   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3702 
3703   PetscFunctionBegin;
3704   if (coloring->ctype == IS_COLORING_GLOBAL) {
3705     ISColoringValue *allcolors,*colors;
3706     ISColoring      ocoloring;
3707 
3708     /* set coloring for diagonal portion */
3709     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3710 
3711     /* set coloring for off-diagonal portion */
3712     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3713     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3714     for (i=0; i<a->B->cmap->n; i++) {
3715       colors[i] = allcolors[a->garray[i]];
3716     }
3717     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3718     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3719     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3720     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3721   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3722     ISColoringValue *colors;
3723     PetscInt        *larray;
3724     ISColoring      ocoloring;
3725 
3726     /* set coloring for diagonal portion */
3727     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3728     for (i=0; i<a->A->cmap->n; i++) {
3729       larray[i] = i + A->cmap->rstart;
3730     }
3731     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3732     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3733     for (i=0; i<a->A->cmap->n; i++) {
3734       colors[i] = coloring->colors[larray[i]];
3735     }
3736     ierr = PetscFree(larray);CHKERRQ(ierr);
3737     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3738     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3739     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3740 
3741     /* set coloring for off-diagonal portion */
3742     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3743     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3744     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3745     for (i=0; i<a->B->cmap->n; i++) {
3746       colors[i] = coloring->colors[larray[i]];
3747     }
3748     ierr = PetscFree(larray);CHKERRQ(ierr);
3749     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3750     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3751     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3752   } else {
3753     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3754   }
3755 
3756   PetscFunctionReturn(0);
3757 }
3758 
3759 #if defined(PETSC_HAVE_ADIC)
3760 #undef __FUNCT__
3761 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3762 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3763 {
3764   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3765   PetscErrorCode ierr;
3766 
3767   PetscFunctionBegin;
3768   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3769   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3770   PetscFunctionReturn(0);
3771 }
3772 #endif
3773 
3774 #undef __FUNCT__
3775 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3776 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3777 {
3778   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3779   PetscErrorCode ierr;
3780 
3781   PetscFunctionBegin;
3782   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3783   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3784   PetscFunctionReturn(0);
3785 }
3786 
3787 #undef __FUNCT__
3788 #define __FUNCT__ "MatMerge"
3789 /*@
3790       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3791                  matrices from each processor
3792 
3793     Collective on MPI_Comm
3794 
3795    Input Parameters:
3796 +    comm - the communicators the parallel matrix will live on
3797 .    inmat - the input sequential matrices
3798 .    n - number of local columns (or PETSC_DECIDE)
3799 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3800 
3801    Output Parameter:
3802 .    outmat - the parallel matrix generated
3803 
3804     Level: advanced
3805 
3806    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3807 
3808 @*/
3809 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3810 {
3811   PetscErrorCode ierr;
3812   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3813   PetscInt       *indx;
3814   PetscScalar    *values;
3815 
3816   PetscFunctionBegin;
3817   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3818   if (scall == MAT_INITIAL_MATRIX){
3819     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3820     if (n == PETSC_DECIDE){
3821       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3822     }
3823     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3824     rstart -= m;
3825 
3826     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3827     for (i=0;i<m;i++) {
3828       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3829       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3830       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3831     }
3832     /* This routine will ONLY return MPIAIJ type matrix */
3833     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3834     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3835     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3836     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3837     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3838 
3839   } else if (scall == MAT_REUSE_MATRIX){
3840     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3841   } else {
3842     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3843   }
3844 
3845   for (i=0;i<m;i++) {
3846     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3847     Ii    = i + rstart;
3848     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3849     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3850   }
3851   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3852   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3853   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3854 
3855   PetscFunctionReturn(0);
3856 }
3857 
3858 #undef __FUNCT__
3859 #define __FUNCT__ "MatFileSplit"
3860 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3861 {
3862   PetscErrorCode    ierr;
3863   PetscMPIInt       rank;
3864   PetscInt          m,N,i,rstart,nnz;
3865   size_t            len;
3866   const PetscInt    *indx;
3867   PetscViewer       out;
3868   char              *name;
3869   Mat               B;
3870   const PetscScalar *values;
3871 
3872   PetscFunctionBegin;
3873   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3874   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3875   /* Should this be the type of the diagonal block of A? */
3876   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3877   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3878   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3879   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3880   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3881   for (i=0;i<m;i++) {
3882     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3883     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3884     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3885   }
3886   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3887   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3888 
3889   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3890   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3891   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3892   sprintf(name,"%s.%d",outfile,rank);
3893   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3894   ierr = PetscFree(name);
3895   ierr = MatView(B,out);CHKERRQ(ierr);
3896   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3897   ierr = MatDestroy(B);CHKERRQ(ierr);
3898   PetscFunctionReturn(0);
3899 }
3900 
3901 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3902 #undef __FUNCT__
3903 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3904 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3905 {
3906   PetscErrorCode       ierr;
3907   Mat_Merge_SeqsToMPI  *merge;
3908   PetscContainer       container;
3909 
3910   PetscFunctionBegin;
3911   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3912   if (container) {
3913     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3914     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3915     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3916     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3917     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3918     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3919     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3920     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3921     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3922     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3923     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3924     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3925 
3926     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3927     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3928   }
3929   ierr = PetscFree(merge);CHKERRQ(ierr);
3930 
3931   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3932   PetscFunctionReturn(0);
3933 }
3934 
3935 #include "../src/mat/utils/freespace.h"
3936 #include "petscbt.h"
3937 
3938 #undef __FUNCT__
3939 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3940 /*@C
3941       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3942                  matrices from each processor
3943 
3944     Collective on MPI_Comm
3945 
3946    Input Parameters:
3947 +    comm - the communicators the parallel matrix will live on
3948 .    seqmat - the input sequential matrices
3949 .    m - number of local rows (or PETSC_DECIDE)
3950 .    n - number of local columns (or PETSC_DECIDE)
3951 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3952 
3953    Output Parameter:
3954 .    mpimat - the parallel matrix generated
3955 
3956     Level: advanced
3957 
3958    Notes:
3959      The dimensions of the sequential matrix in each processor MUST be the same.
3960      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3961      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3962 @*/
3963 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3964 {
3965   PetscErrorCode       ierr;
3966   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3967   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3968   PetscMPIInt          size,rank,taga,*len_s;
3969   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
3970   PetscInt             proc,m;
3971   PetscInt             **buf_ri,**buf_rj;
3972   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3973   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3974   MPI_Request          *s_waits,*r_waits;
3975   MPI_Status           *status;
3976   MatScalar            *aa=a->a;
3977   MatScalar            **abuf_r,*ba_i;
3978   Mat_Merge_SeqsToMPI  *merge;
3979   PetscContainer       container;
3980 
3981   PetscFunctionBegin;
3982   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3983 
3984   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3985   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3986 
3987   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3988   if (container) {
3989     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3990   }
3991   bi     = merge->bi;
3992   bj     = merge->bj;
3993   buf_ri = merge->buf_ri;
3994   buf_rj = merge->buf_rj;
3995 
3996   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3997   owners = merge->rowmap.range;
3998   len_s  = merge->len_s;
3999 
4000   /* send and recv matrix values */
4001   /*-----------------------------*/
4002   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4003   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4004 
4005   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4006   for (proc=0,k=0; proc<size; proc++){
4007     if (!len_s[proc]) continue;
4008     i = owners[proc];
4009     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4010     k++;
4011   }
4012 
4013   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4014   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4015   ierr = PetscFree(status);CHKERRQ(ierr);
4016 
4017   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4018   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4019 
4020   /* insert mat values of mpimat */
4021   /*----------------------------*/
4022   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4023   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4024   nextrow = buf_ri_k + merge->nrecv;
4025   nextai  = nextrow + merge->nrecv;
4026 
4027   for (k=0; k<merge->nrecv; k++){
4028     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4029     nrows = *(buf_ri_k[k]);
4030     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4031     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4032   }
4033 
4034   /* set values of ba */
4035   m = merge->rowmap.n;
4036   for (i=0; i<m; i++) {
4037     arow = owners[rank] + i;
4038     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4039     bnzi = bi[i+1] - bi[i];
4040     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4041 
4042     /* add local non-zero vals of this proc's seqmat into ba */
4043     anzi = ai[arow+1] - ai[arow];
4044     aj   = a->j + ai[arow];
4045     aa   = a->a + ai[arow];
4046     nextaj = 0;
4047     for (j=0; nextaj<anzi; j++){
4048       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4049         ba_i[j] += aa[nextaj++];
4050       }
4051     }
4052 
4053     /* add received vals into ba */
4054     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4055       /* i-th row */
4056       if (i == *nextrow[k]) {
4057         anzi = *(nextai[k]+1) - *nextai[k];
4058         aj   = buf_rj[k] + *(nextai[k]);
4059         aa   = abuf_r[k] + *(nextai[k]);
4060         nextaj = 0;
4061         for (j=0; nextaj<anzi; j++){
4062           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4063             ba_i[j] += aa[nextaj++];
4064           }
4065         }
4066         nextrow[k]++; nextai[k]++;
4067       }
4068     }
4069     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4070   }
4071   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4072   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4073 
4074   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4075   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4076   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4077   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4078   PetscFunctionReturn(0);
4079 }
4080 
4081 #undef __FUNCT__
4082 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4083 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4084 {
4085   PetscErrorCode       ierr;
4086   Mat                  B_mpi;
4087   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4088   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4089   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4090   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4091   PetscInt             len,proc,*dnz,*onz;
4092   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4093   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4094   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4095   MPI_Status           *status;
4096   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4097   PetscBT              lnkbt;
4098   Mat_Merge_SeqsToMPI  *merge;
4099   PetscContainer       container;
4100 
4101   PetscFunctionBegin;
4102   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4103 
4104   /* make sure it is a PETSc comm */
4105   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4106   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4107   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4108 
4109   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4110   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4111 
4112   /* determine row ownership */
4113   /*---------------------------------------------------------*/
4114   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4115   merge->rowmap.n = m;
4116   merge->rowmap.N = M;
4117   merge->rowmap.bs = 1;
4118   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4119   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4120   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4121 
4122   m      = merge->rowmap.n;
4123   M      = merge->rowmap.N;
4124   owners = merge->rowmap.range;
4125 
4126   /* determine the number of messages to send, their lengths */
4127   /*---------------------------------------------------------*/
4128   len_s  = merge->len_s;
4129 
4130   len = 0;  /* length of buf_si[] */
4131   merge->nsend = 0;
4132   for (proc=0; proc<size; proc++){
4133     len_si[proc] = 0;
4134     if (proc == rank){
4135       len_s[proc] = 0;
4136     } else {
4137       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4138       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4139     }
4140     if (len_s[proc]) {
4141       merge->nsend++;
4142       nrows = 0;
4143       for (i=owners[proc]; i<owners[proc+1]; i++){
4144         if (ai[i+1] > ai[i]) nrows++;
4145       }
4146       len_si[proc] = 2*(nrows+1);
4147       len += len_si[proc];
4148     }
4149   }
4150 
4151   /* determine the number and length of messages to receive for ij-structure */
4152   /*-------------------------------------------------------------------------*/
4153   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4154   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4155 
4156   /* post the Irecv of j-structure */
4157   /*-------------------------------*/
4158   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4159   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4160 
4161   /* post the Isend of j-structure */
4162   /*--------------------------------*/
4163   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4164   sj_waits = si_waits + merge->nsend;
4165 
4166   for (proc=0, k=0; proc<size; proc++){
4167     if (!len_s[proc]) continue;
4168     i = owners[proc];
4169     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4170     k++;
4171   }
4172 
4173   /* receives and sends of j-structure are complete */
4174   /*------------------------------------------------*/
4175   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4176   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4177 
4178   /* send and recv i-structure */
4179   /*---------------------------*/
4180   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4181   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4182 
4183   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4184   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4185   for (proc=0,k=0; proc<size; proc++){
4186     if (!len_s[proc]) continue;
4187     /* form outgoing message for i-structure:
4188          buf_si[0]:                 nrows to be sent
4189                [1:nrows]:           row index (global)
4190                [nrows+1:2*nrows+1]: i-structure index
4191     */
4192     /*-------------------------------------------*/
4193     nrows = len_si[proc]/2 - 1;
4194     buf_si_i    = buf_si + nrows+1;
4195     buf_si[0]   = nrows;
4196     buf_si_i[0] = 0;
4197     nrows = 0;
4198     for (i=owners[proc]; i<owners[proc+1]; i++){
4199       anzi = ai[i+1] - ai[i];
4200       if (anzi) {
4201         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4202         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4203         nrows++;
4204       }
4205     }
4206     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4207     k++;
4208     buf_si += len_si[proc];
4209   }
4210 
4211   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4212   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4213 
4214   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4215   for (i=0; i<merge->nrecv; i++){
4216     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4217   }
4218 
4219   ierr = PetscFree(len_si);CHKERRQ(ierr);
4220   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4221   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4222   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4223   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4224   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4225   ierr = PetscFree(status);CHKERRQ(ierr);
4226 
4227   /* compute a local seq matrix in each processor */
4228   /*----------------------------------------------*/
4229   /* allocate bi array and free space for accumulating nonzero column info */
4230   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4231   bi[0] = 0;
4232 
4233   /* create and initialize a linked list */
4234   nlnk = N+1;
4235   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4236 
4237   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4238   len = 0;
4239   len  = ai[owners[rank+1]] - ai[owners[rank]];
4240   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4241   current_space = free_space;
4242 
4243   /* determine symbolic info for each local row */
4244   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4245   nextrow = buf_ri_k + merge->nrecv;
4246   nextai  = nextrow + merge->nrecv;
4247   for (k=0; k<merge->nrecv; k++){
4248     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4249     nrows = *buf_ri_k[k];
4250     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4251     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4252   }
4253 
4254   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4255   len = 0;
4256   for (i=0;i<m;i++) {
4257     bnzi   = 0;
4258     /* add local non-zero cols of this proc's seqmat into lnk */
4259     arow   = owners[rank] + i;
4260     anzi   = ai[arow+1] - ai[arow];
4261     aj     = a->j + ai[arow];
4262     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4263     bnzi += nlnk;
4264     /* add received col data into lnk */
4265     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4266       if (i == *nextrow[k]) { /* i-th row */
4267         anzi = *(nextai[k]+1) - *nextai[k];
4268         aj   = buf_rj[k] + *nextai[k];
4269         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4270         bnzi += nlnk;
4271         nextrow[k]++; nextai[k]++;
4272       }
4273     }
4274     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4275 
4276     /* if free space is not available, make more free space */
4277     if (current_space->local_remaining<bnzi) {
4278       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4279       nspacedouble++;
4280     }
4281     /* copy data into free space, then initialize lnk */
4282     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4283     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4284 
4285     current_space->array           += bnzi;
4286     current_space->local_used      += bnzi;
4287     current_space->local_remaining -= bnzi;
4288 
4289     bi[i+1] = bi[i] + bnzi;
4290   }
4291 
4292   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4293 
4294   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4295   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4296   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4297 
4298   /* create symbolic parallel matrix B_mpi */
4299   /*---------------------------------------*/
4300   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4301   if (n==PETSC_DECIDE) {
4302     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4303   } else {
4304     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4305   }
4306   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4307   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4308   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4309 
4310   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4311   B_mpi->assembled     = PETSC_FALSE;
4312   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4313   merge->bi            = bi;
4314   merge->bj            = bj;
4315   merge->buf_ri        = buf_ri;
4316   merge->buf_rj        = buf_rj;
4317   merge->coi           = PETSC_NULL;
4318   merge->coj           = PETSC_NULL;
4319   merge->owners_co     = PETSC_NULL;
4320 
4321   /* attach the supporting struct to B_mpi for reuse */
4322   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4323   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4324   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4325   *mpimat = B_mpi;
4326 
4327   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4328   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4329   PetscFunctionReturn(0);
4330 }
4331 
4332 #undef __FUNCT__
4333 #define __FUNCT__ "MatMerge_SeqsToMPI"
4334 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4335 {
4336   PetscErrorCode   ierr;
4337 
4338   PetscFunctionBegin;
4339   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4340   if (scall == MAT_INITIAL_MATRIX){
4341     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4342   }
4343   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4344   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4345   PetscFunctionReturn(0);
4346 }
4347 
4348 #undef __FUNCT__
4349 #define __FUNCT__ "MatGetLocalMat"
4350 /*@
4351      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4352 
4353     Not Collective
4354 
4355    Input Parameters:
4356 +    A - the matrix
4357 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4358 
4359    Output Parameter:
4360 .    A_loc - the local sequential matrix generated
4361 
4362     Level: developer
4363 
4364 @*/
4365 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4366 {
4367   PetscErrorCode  ierr;
4368   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4369   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4370   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4371   MatScalar       *aa=a->a,*ba=b->a,*cam;
4372   PetscScalar     *ca;
4373   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4374   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4375 
4376   PetscFunctionBegin;
4377   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4378   if (scall == MAT_INITIAL_MATRIX){
4379     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4380     ci[0] = 0;
4381     for (i=0; i<am; i++){
4382       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4383     }
4384     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4385     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4386     k = 0;
4387     for (i=0; i<am; i++) {
4388       ncols_o = bi[i+1] - bi[i];
4389       ncols_d = ai[i+1] - ai[i];
4390       /* off-diagonal portion of A */
4391       for (jo=0; jo<ncols_o; jo++) {
4392         col = cmap[*bj];
4393         if (col >= cstart) break;
4394         cj[k]   = col; bj++;
4395         ca[k++] = *ba++;
4396       }
4397       /* diagonal portion of A */
4398       for (j=0; j<ncols_d; j++) {
4399         cj[k]   = cstart + *aj++;
4400         ca[k++] = *aa++;
4401       }
4402       /* off-diagonal portion of A */
4403       for (j=jo; j<ncols_o; j++) {
4404         cj[k]   = cmap[*bj++];
4405         ca[k++] = *ba++;
4406       }
4407     }
4408     /* put together the new matrix */
4409     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4410     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4411     /* Since these are PETSc arrays, change flags to free them as necessary. */
4412     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4413     mat->free_a  = PETSC_TRUE;
4414     mat->free_ij = PETSC_TRUE;
4415     mat->nonew   = 0;
4416   } else if (scall == MAT_REUSE_MATRIX){
4417     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4418     ci = mat->i; cj = mat->j; cam = mat->a;
4419     for (i=0; i<am; i++) {
4420       /* off-diagonal portion of A */
4421       ncols_o = bi[i+1] - bi[i];
4422       for (jo=0; jo<ncols_o; jo++) {
4423         col = cmap[*bj];
4424         if (col >= cstart) break;
4425         *cam++ = *ba++; bj++;
4426       }
4427       /* diagonal portion of A */
4428       ncols_d = ai[i+1] - ai[i];
4429       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4430       /* off-diagonal portion of A */
4431       for (j=jo; j<ncols_o; j++) {
4432         *cam++ = *ba++; bj++;
4433       }
4434     }
4435   } else {
4436     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4437   }
4438 
4439   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4440   PetscFunctionReturn(0);
4441 }
4442 
4443 #undef __FUNCT__
4444 #define __FUNCT__ "MatGetLocalMatCondensed"
4445 /*@C
4446      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4447 
4448     Not Collective
4449 
4450    Input Parameters:
4451 +    A - the matrix
4452 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4453 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4454 
4455    Output Parameter:
4456 .    A_loc - the local sequential matrix generated
4457 
4458     Level: developer
4459 
4460 @*/
4461 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4462 {
4463   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4464   PetscErrorCode    ierr;
4465   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4466   IS                isrowa,iscola;
4467   Mat               *aloc;
4468 
4469   PetscFunctionBegin;
4470   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4471   if (!row){
4472     start = A->rmap->rstart; end = A->rmap->rend;
4473     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4474   } else {
4475     isrowa = *row;
4476   }
4477   if (!col){
4478     start = A->cmap->rstart;
4479     cmap  = a->garray;
4480     nzA   = a->A->cmap->n;
4481     nzB   = a->B->cmap->n;
4482     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4483     ncols = 0;
4484     for (i=0; i<nzB; i++) {
4485       if (cmap[i] < start) idx[ncols++] = cmap[i];
4486       else break;
4487     }
4488     imark = i;
4489     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4490     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4491     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4492     ierr = PetscFree(idx);CHKERRQ(ierr);
4493   } else {
4494     iscola = *col;
4495   }
4496   if (scall != MAT_INITIAL_MATRIX){
4497     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4498     aloc[0] = *A_loc;
4499   }
4500   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4501   *A_loc = aloc[0];
4502   ierr = PetscFree(aloc);CHKERRQ(ierr);
4503   if (!row){
4504     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4505   }
4506   if (!col){
4507     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4508   }
4509   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4510   PetscFunctionReturn(0);
4511 }
4512 
4513 #undef __FUNCT__
4514 #define __FUNCT__ "MatGetBrowsOfAcols"
4515 /*@C
4516     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4517 
4518     Collective on Mat
4519 
4520    Input Parameters:
4521 +    A,B - the matrices in mpiaij format
4522 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4523 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4524 
4525    Output Parameter:
4526 +    rowb, colb - index sets of rows and columns of B to extract
4527 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4528 -    B_seq - the sequential matrix generated
4529 
4530     Level: developer
4531 
4532 @*/
4533 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4534 {
4535   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4536   PetscErrorCode    ierr;
4537   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4538   IS                isrowb,iscolb;
4539   Mat               *bseq;
4540 
4541   PetscFunctionBegin;
4542   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4543     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4544   }
4545   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4546 
4547   if (scall == MAT_INITIAL_MATRIX){
4548     start = A->cmap->rstart;
4549     cmap  = a->garray;
4550     nzA   = a->A->cmap->n;
4551     nzB   = a->B->cmap->n;
4552     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4553     ncols = 0;
4554     for (i=0; i<nzB; i++) {  /* row < local row index */
4555       if (cmap[i] < start) idx[ncols++] = cmap[i];
4556       else break;
4557     }
4558     imark = i;
4559     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4560     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4561     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4562     ierr = PetscFree(idx);CHKERRQ(ierr);
4563     *brstart = imark;
4564     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4565   } else {
4566     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4567     isrowb = *rowb; iscolb = *colb;
4568     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4569     bseq[0] = *B_seq;
4570   }
4571   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4572   *B_seq = bseq[0];
4573   ierr = PetscFree(bseq);CHKERRQ(ierr);
4574   if (!rowb){
4575     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4576   } else {
4577     *rowb = isrowb;
4578   }
4579   if (!colb){
4580     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4581   } else {
4582     *colb = iscolb;
4583   }
4584   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4585   PetscFunctionReturn(0);
4586 }
4587 
4588 #undef __FUNCT__
4589 #define __FUNCT__ "MatGetBrowsOfAoCols"
4590 /*@C
4591     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4592     of the OFF-DIAGONAL portion of local A
4593 
4594     Collective on Mat
4595 
4596    Input Parameters:
4597 +    A,B - the matrices in mpiaij format
4598 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4599 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4600 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4601 
4602    Output Parameter:
4603 +    B_oth - the sequential matrix generated
4604 
4605     Level: developer
4606 
4607 @*/
4608 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4609 {
4610   VecScatter_MPI_General *gen_to,*gen_from;
4611   PetscErrorCode         ierr;
4612   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4613   Mat_SeqAIJ             *b_oth;
4614   VecScatter             ctx=a->Mvctx;
4615   MPI_Comm               comm=((PetscObject)ctx)->comm;
4616   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4617   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4618   PetscScalar            *rvalues,*svalues;
4619   MatScalar              *b_otha,*bufa,*bufA;
4620   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4621   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4622   MPI_Status             *sstatus,rstatus;
4623   PetscMPIInt            jj;
4624   PetscInt               *cols,sbs,rbs;
4625   PetscScalar            *vals;
4626 
4627   PetscFunctionBegin;
4628   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4629     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4630   }
4631   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4632   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4633 
4634   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4635   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4636   rvalues  = gen_from->values; /* holds the length of receiving row */
4637   svalues  = gen_to->values;   /* holds the length of sending row */
4638   nrecvs   = gen_from->n;
4639   nsends   = gen_to->n;
4640 
4641   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4642   srow     = gen_to->indices;   /* local row index to be sent */
4643   sstarts  = gen_to->starts;
4644   sprocs   = gen_to->procs;
4645   sstatus  = gen_to->sstatus;
4646   sbs      = gen_to->bs;
4647   rstarts  = gen_from->starts;
4648   rprocs   = gen_from->procs;
4649   rbs      = gen_from->bs;
4650 
4651   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4652   if (scall == MAT_INITIAL_MATRIX){
4653     /* i-array */
4654     /*---------*/
4655     /*  post receives */
4656     for (i=0; i<nrecvs; i++){
4657       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4658       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4659       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4660     }
4661 
4662     /* pack the outgoing message */
4663     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4664     rstartsj = sstartsj + nsends +1;
4665     sstartsj[0] = 0;  rstartsj[0] = 0;
4666     len = 0; /* total length of j or a array to be sent */
4667     k = 0;
4668     for (i=0; i<nsends; i++){
4669       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4670       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4671       for (j=0; j<nrows; j++) {
4672         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4673         for (l=0; l<sbs; l++){
4674           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4675           rowlen[j*sbs+l] = ncols;
4676           len += ncols;
4677           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4678         }
4679         k++;
4680       }
4681       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4682       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4683     }
4684     /* recvs and sends of i-array are completed */
4685     i = nrecvs;
4686     while (i--) {
4687       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4688     }
4689     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4690 
4691     /* allocate buffers for sending j and a arrays */
4692     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4693     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4694 
4695     /* create i-array of B_oth */
4696     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4697     b_othi[0] = 0;
4698     len = 0; /* total length of j or a array to be received */
4699     k = 0;
4700     for (i=0; i<nrecvs; i++){
4701       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4702       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4703       for (j=0; j<nrows; j++) {
4704         b_othi[k+1] = b_othi[k] + rowlen[j];
4705         len += rowlen[j]; k++;
4706       }
4707       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4708     }
4709 
4710     /* allocate space for j and a arrrays of B_oth */
4711     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4712     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4713 
4714     /* j-array */
4715     /*---------*/
4716     /*  post receives of j-array */
4717     for (i=0; i<nrecvs; i++){
4718       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4719       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4720     }
4721 
4722     /* pack the outgoing message j-array */
4723     k = 0;
4724     for (i=0; i<nsends; i++){
4725       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4726       bufJ = bufj+sstartsj[i];
4727       for (j=0; j<nrows; j++) {
4728         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4729         for (ll=0; ll<sbs; ll++){
4730           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4731           for (l=0; l<ncols; l++){
4732             *bufJ++ = cols[l];
4733           }
4734           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4735         }
4736       }
4737       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4738     }
4739 
4740     /* recvs and sends of j-array are completed */
4741     i = nrecvs;
4742     while (i--) {
4743       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4744     }
4745     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4746   } else if (scall == MAT_REUSE_MATRIX){
4747     sstartsj = *startsj;
4748     rstartsj = sstartsj + nsends +1;
4749     bufa     = *bufa_ptr;
4750     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4751     b_otha   = b_oth->a;
4752   } else {
4753     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4754   }
4755 
4756   /* a-array */
4757   /*---------*/
4758   /*  post receives of a-array */
4759   for (i=0; i<nrecvs; i++){
4760     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4761     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4762   }
4763 
4764   /* pack the outgoing message a-array */
4765   k = 0;
4766   for (i=0; i<nsends; i++){
4767     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4768     bufA = bufa+sstartsj[i];
4769     for (j=0; j<nrows; j++) {
4770       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4771       for (ll=0; ll<sbs; ll++){
4772         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4773         for (l=0; l<ncols; l++){
4774           *bufA++ = vals[l];
4775         }
4776         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4777       }
4778     }
4779     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4780   }
4781   /* recvs and sends of a-array are completed */
4782   i = nrecvs;
4783   while (i--) {
4784     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4785   }
4786   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4787   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4788 
4789   if (scall == MAT_INITIAL_MATRIX){
4790     /* put together the new matrix */
4791     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4792 
4793     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4794     /* Since these are PETSc arrays, change flags to free them as necessary. */
4795     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4796     b_oth->free_a  = PETSC_TRUE;
4797     b_oth->free_ij = PETSC_TRUE;
4798     b_oth->nonew   = 0;
4799 
4800     ierr = PetscFree(bufj);CHKERRQ(ierr);
4801     if (!startsj || !bufa_ptr){
4802       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4803       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4804     } else {
4805       *startsj  = sstartsj;
4806       *bufa_ptr = bufa;
4807     }
4808   }
4809   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4810   PetscFunctionReturn(0);
4811 }
4812 
4813 #undef __FUNCT__
4814 #define __FUNCT__ "MatGetCommunicationStructs"
4815 /*@C
4816   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4817 
4818   Not Collective
4819 
4820   Input Parameters:
4821 . A - The matrix in mpiaij format
4822 
4823   Output Parameter:
4824 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4825 . colmap - A map from global column index to local index into lvec
4826 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4827 
4828   Level: developer
4829 
4830 @*/
4831 #if defined (PETSC_USE_CTABLE)
4832 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4833 #else
4834 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4835 #endif
4836 {
4837   Mat_MPIAIJ *a;
4838 
4839   PetscFunctionBegin;
4840   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4841   PetscValidPointer(lvec, 2)
4842   PetscValidPointer(colmap, 3)
4843   PetscValidPointer(multScatter, 4)
4844   a = (Mat_MPIAIJ *) A->data;
4845   if (lvec) *lvec = a->lvec;
4846   if (colmap) *colmap = a->colmap;
4847   if (multScatter) *multScatter = a->Mvctx;
4848   PetscFunctionReturn(0);
4849 }
4850 
4851 EXTERN_C_BEGIN
4852 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4853 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4854 EXTERN_C_END
4855 
4856 #include "../src/mat/impls/dense/mpi/mpidense.h"
4857 
4858 #undef __FUNCT__
4859 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4860 /*
4861     Computes (B'*A')' since computing B*A directly is untenable
4862 
4863                n                       p                          p
4864         (              )       (              )         (                  )
4865       m (      A       )  *  n (       B      )   =   m (         C        )
4866         (              )       (              )         (                  )
4867 
4868 */
4869 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4870 {
4871   PetscErrorCode     ierr;
4872   Mat                At,Bt,Ct;
4873 
4874   PetscFunctionBegin;
4875   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4876   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4877   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4878   ierr = MatDestroy(At);CHKERRQ(ierr);
4879   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4880   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4881   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4882   PetscFunctionReturn(0);
4883 }
4884 
4885 #undef __FUNCT__
4886 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4887 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4888 {
4889   PetscErrorCode ierr;
4890   PetscInt       m=A->rmap->n,n=B->cmap->n;
4891   Mat            Cmat;
4892 
4893   PetscFunctionBegin;
4894   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4895   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4896   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4897   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4898   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4899   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4900   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4901   *C   = Cmat;
4902   PetscFunctionReturn(0);
4903 }
4904 
4905 /* ----------------------------------------------------------------*/
4906 #undef __FUNCT__
4907 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4908 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4909 {
4910   PetscErrorCode ierr;
4911 
4912   PetscFunctionBegin;
4913   if (scall == MAT_INITIAL_MATRIX){
4914     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4915   }
4916   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4917   PetscFunctionReturn(0);
4918 }
4919 
4920 EXTERN_C_BEGIN
4921 #if defined(PETSC_HAVE_MUMPS)
4922 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4923 #endif
4924 #if defined(PETSC_HAVE_PASTIX)
4925 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
4926 #endif
4927 #if defined(PETSC_HAVE_SUPERLU_DIST)
4928 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
4929 #endif
4930 #if defined(PETSC_HAVE_SPOOLES)
4931 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
4932 #endif
4933 EXTERN_C_END
4934 
4935 /*MC
4936    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4937 
4938    Options Database Keys:
4939 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4940 
4941   Level: beginner
4942 
4943 .seealso: MatCreateMPIAIJ()
4944 M*/
4945 
4946 EXTERN_C_BEGIN
4947 #undef __FUNCT__
4948 #define __FUNCT__ "MatCreate_MPIAIJ"
4949 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4950 {
4951   Mat_MPIAIJ     *b;
4952   PetscErrorCode ierr;
4953   PetscMPIInt    size;
4954 
4955   PetscFunctionBegin;
4956   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4957 
4958   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4959   B->data         = (void*)b;
4960   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4961   B->rmap->bs      = 1;
4962   B->assembled    = PETSC_FALSE;
4963   B->mapping      = 0;
4964 
4965   B->insertmode      = NOT_SET_VALUES;
4966   b->size            = size;
4967   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4968 
4969   /* build cache for off array entries formed */
4970   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4971   b->donotstash  = PETSC_FALSE;
4972   b->colmap      = 0;
4973   b->garray      = 0;
4974   b->roworiented = PETSC_TRUE;
4975 
4976   /* stuff used for matrix vector multiply */
4977   b->lvec      = PETSC_NULL;
4978   b->Mvctx     = PETSC_NULL;
4979 
4980   /* stuff for MatGetRow() */
4981   b->rowindices   = 0;
4982   b->rowvalues    = 0;
4983   b->getrowactive = PETSC_FALSE;
4984 
4985 #if defined(PETSC_HAVE_SPOOLES)
4986   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
4987                                      "MatGetFactor_mpiaij_spooles",
4988                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
4989 #endif
4990 #if defined(PETSC_HAVE_MUMPS)
4991   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
4992                                      "MatGetFactor_mpiaij_mumps",
4993                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
4994 #endif
4995 #if defined(PETSC_HAVE_PASTIX)
4996   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_pastix_C",
4997 					   "MatGetFactor_mpiaij_pastix",
4998 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
4999 #endif
5000 #if defined(PETSC_HAVE_SUPERLU_DIST)
5001   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
5002                                      "MatGetFactor_mpiaij_superlu_dist",
5003                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5004 #endif
5005   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5006                                      "MatStoreValues_MPIAIJ",
5007                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5008   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5009                                      "MatRetrieveValues_MPIAIJ",
5010                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5011   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5012 				     "MatGetDiagonalBlock_MPIAIJ",
5013                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5014   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5015 				     "MatIsTranspose_MPIAIJ",
5016 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5017   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5018 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5019 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5020   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5021 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5022 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5023   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5024 				     "MatDiagonalScaleLocal_MPIAIJ",
5025 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5026   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5027                                      "MatConvert_MPIAIJ_MPICSRPERM",
5028                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5029   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5030                                      "MatConvert_MPIAIJ_MPICRL",
5031                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5032   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5033                                      "MatMatMult_MPIDense_MPIAIJ",
5034                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5035   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5036                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5037                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5038   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5039                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5040                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5041   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5042   PetscFunctionReturn(0);
5043 }
5044 EXTERN_C_END
5045 
5046 #undef __FUNCT__
5047 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5048 /*@
5049      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5050          and "off-diagonal" part of the matrix in CSR format.
5051 
5052    Collective on MPI_Comm
5053 
5054    Input Parameters:
5055 +  comm - MPI communicator
5056 .  m - number of local rows (Cannot be PETSC_DECIDE)
5057 .  n - This value should be the same as the local size used in creating the
5058        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5059        calculated if N is given) For square matrices n is almost always m.
5060 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5061 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5062 .   i - row indices for "diagonal" portion of matrix
5063 .   j - column indices
5064 .   a - matrix values
5065 .   oi - row indices for "off-diagonal" portion of matrix
5066 .   oj - column indices
5067 -   oa - matrix values
5068 
5069    Output Parameter:
5070 .   mat - the matrix
5071 
5072    Level: advanced
5073 
5074    Notes:
5075        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5076 
5077        The i and j indices are 0 based
5078 
5079        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5080 
5081 
5082 .keywords: matrix, aij, compressed row, sparse, parallel
5083 
5084 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5085           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5086 @*/
5087 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5088 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5089 {
5090   PetscErrorCode ierr;
5091   Mat_MPIAIJ     *maij;
5092 
5093  PetscFunctionBegin;
5094   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5095   if (i[0]) {
5096     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5097   }
5098   if (oi[0]) {
5099     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5100   }
5101   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5102   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5103   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5104   maij = (Mat_MPIAIJ*) (*mat)->data;
5105   maij->donotstash     = PETSC_TRUE;
5106   (*mat)->preallocated = PETSC_TRUE;
5107 
5108   ierr = PetscMapSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5109   ierr = PetscMapSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5110   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5111   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5112 
5113   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5114   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5115 
5116   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5117   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5118   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5119   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5120 
5121   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5122   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5123   PetscFunctionReturn(0);
5124 }
5125 
5126 /*
5127     Special version for direct calls from Fortran
5128 */
5129 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5130 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5131 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5132 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5133 #endif
5134 
5135 /* Change these macros so can be used in void function */
5136 #undef CHKERRQ
5137 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5138 #undef SETERRQ2
5139 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5140 #undef SETERRQ
5141 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5142 
5143 EXTERN_C_BEGIN
5144 #undef __FUNCT__
5145 #define __FUNCT__ "matsetvaluesmpiaij_"
5146 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5147 {
5148   Mat             mat = *mmat;
5149   PetscInt        m = *mm, n = *mn;
5150   InsertMode      addv = *maddv;
5151   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5152   PetscScalar     value;
5153   PetscErrorCode  ierr;
5154 
5155   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5156   if (mat->insertmode == NOT_SET_VALUES) {
5157     mat->insertmode = addv;
5158   }
5159 #if defined(PETSC_USE_DEBUG)
5160   else if (mat->insertmode != addv) {
5161     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5162   }
5163 #endif
5164   {
5165   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5166   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5167   PetscTruth      roworiented = aij->roworiented;
5168 
5169   /* Some Variables required in the macro */
5170   Mat             A = aij->A;
5171   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5172   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5173   MatScalar       *aa = a->a;
5174   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5175   Mat             B = aij->B;
5176   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5177   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5178   MatScalar       *ba = b->a;
5179 
5180   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5181   PetscInt        nonew = a->nonew;
5182   MatScalar       *ap1,*ap2;
5183 
5184   PetscFunctionBegin;
5185   for (i=0; i<m; i++) {
5186     if (im[i] < 0) continue;
5187 #if defined(PETSC_USE_DEBUG)
5188     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5189 #endif
5190     if (im[i] >= rstart && im[i] < rend) {
5191       row      = im[i] - rstart;
5192       lastcol1 = -1;
5193       rp1      = aj + ai[row];
5194       ap1      = aa + ai[row];
5195       rmax1    = aimax[row];
5196       nrow1    = ailen[row];
5197       low1     = 0;
5198       high1    = nrow1;
5199       lastcol2 = -1;
5200       rp2      = bj + bi[row];
5201       ap2      = ba + bi[row];
5202       rmax2    = bimax[row];
5203       nrow2    = bilen[row];
5204       low2     = 0;
5205       high2    = nrow2;
5206 
5207       for (j=0; j<n; j++) {
5208         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5209         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5210         if (in[j] >= cstart && in[j] < cend){
5211           col = in[j] - cstart;
5212           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5213         } else if (in[j] < 0) continue;
5214 #if defined(PETSC_USE_DEBUG)
5215         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5216 #endif
5217         else {
5218           if (mat->was_assembled) {
5219             if (!aij->colmap) {
5220               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5221             }
5222 #if defined (PETSC_USE_CTABLE)
5223             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5224 	    col--;
5225 #else
5226             col = aij->colmap[in[j]] - 1;
5227 #endif
5228             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5229               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5230               col =  in[j];
5231               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5232               B = aij->B;
5233               b = (Mat_SeqAIJ*)B->data;
5234               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5235               rp2      = bj + bi[row];
5236               ap2      = ba + bi[row];
5237               rmax2    = bimax[row];
5238               nrow2    = bilen[row];
5239               low2     = 0;
5240               high2    = nrow2;
5241               bm       = aij->B->rmap->n;
5242               ba = b->a;
5243             }
5244           } else col = in[j];
5245           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5246         }
5247       }
5248     } else {
5249       if (!aij->donotstash) {
5250         if (roworiented) {
5251           if (ignorezeroentries && v[i*n] == 0.0) continue;
5252           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5253         } else {
5254           if (ignorezeroentries && v[i] == 0.0) continue;
5255           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5256         }
5257       }
5258     }
5259   }}
5260   PetscFunctionReturnVoid();
5261 }
5262 EXTERN_C_END
5263 
5264