xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 719d5645761d844e4357b7ee00a3296dffe0b787)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           if (ignorezeroentries && v[i*n] == 0.0) continue;
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
398         } else {
399           if (ignorezeroentries && v[i] == 0.0) continue;
400           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
401         }
402       }
403     }
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatGetValues_MPIAIJ"
410 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
411 {
412   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
413   PetscErrorCode ierr;
414   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
415   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
416 
417   PetscFunctionBegin;
418   for (i=0; i<m; i++) {
419     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
420     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
421     if (idxm[i] >= rstart && idxm[i] < rend) {
422       row = idxm[i] - rstart;
423       for (j=0; j<n; j++) {
424         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
425         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
426         if (idxn[j] >= cstart && idxn[j] < cend){
427           col = idxn[j] - cstart;
428           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
429         } else {
430           if (!aij->colmap) {
431             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
432           }
433 #if defined (PETSC_USE_CTABLE)
434           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
435           col --;
436 #else
437           col = aij->colmap[idxn[j]] - 1;
438 #endif
439           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
440           else {
441             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442           }
443         }
444       }
445     } else {
446       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
447     }
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap->range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   /* inform other processors of number of messages and max length*/
606   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
607 
608   /* post receives:   */
609   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
610   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
611   for (i=0; i<nrecvs; i++) {
612     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
613   }
614 
615   /* do sends:
616       1) starts[i] gives the starting index in svalues for stuff going to
617          the ith processor
618   */
619   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
620   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
621   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
622   starts[0] = 0;
623   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
624   for (i=0; i<N; i++) {
625     svalues[starts[owner[i]]++] = rows[i];
626   }
627 
628   starts[0] = 0;
629   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   count = 0;
631   for (i=0; i<size; i++) {
632     if (nprocs[2*i+1]) {
633       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
634     }
635   }
636   ierr = PetscFree(starts);CHKERRQ(ierr);
637 
638   base = owners[rank];
639 
640   /*  wait on receives */
641   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
642   source = lens + nrecvs;
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree(lens);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676        Contributed by: Matthew Knepley
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultAdd_MPIAIJ"
733 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
741   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
742   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
748 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
749 {
750   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
751   PetscErrorCode ierr;
752   PetscTruth     merged;
753 
754   PetscFunctionBegin;
755   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
756   /* do nondiagonal part */
757   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
758   if (!merged) {
759     /* send it on its way */
760     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
761     /* do local part */
762     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
763     /* receive remote parts: note this assumes the values are not actually */
764     /* added in yy until the next line, */
765     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
766   } else {
767     /* do local part */
768     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
769     /* send it on its way */
770     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
771     /* values actually were received in the Begin() but we need to call this nop */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   }
774   PetscFunctionReturn(0);
775 }
776 
777 EXTERN_C_BEGIN
778 #undef __FUNCT__
779 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
780 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
781 {
782   MPI_Comm       comm;
783   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
784   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
785   IS             Me,Notme;
786   PetscErrorCode ierr;
787   PetscInt       M,N,first,last,*notme,i;
788   PetscMPIInt    size;
789 
790   PetscFunctionBegin;
791 
792   /* Easy test: symmetric diagonal block */
793   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
794   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
795   if (!*f) PetscFunctionReturn(0);
796   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
797   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
798   if (size == 1) PetscFunctionReturn(0);
799 
800   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
801   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
802   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
803   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
804   for (i=0; i<first; i++) notme[i] = i;
805   for (i=last; i<M; i++) notme[i-last+first] = i;
806   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
807   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
808   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
809   Aoff = Aoffs[0];
810   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
811   Boff = Boffs[0];
812   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
813   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
814   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
815   ierr = ISDestroy(Me);CHKERRQ(ierr);
816   ierr = ISDestroy(Notme);CHKERRQ(ierr);
817 
818   PetscFunctionReturn(0);
819 }
820 EXTERN_C_END
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
824 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
825 {
826   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
827   PetscErrorCode ierr;
828 
829   PetscFunctionBegin;
830   /* do nondiagonal part */
831   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
832   /* send it on its way */
833   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
834   /* do local part */
835   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
836   /* receive remote parts */
837   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   This only works correctly for square matrices where the subblock A->A is the
843    diagonal block
844 */
845 #undef __FUNCT__
846 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
847 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
848 {
849   PetscErrorCode ierr;
850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
851 
852   PetscFunctionBegin;
853   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
854   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
855     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
856   }
857   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatScale_MPIAIJ"
863 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
864 {
865   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
870   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatDestroy_MPIAIJ"
876 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
877 {
878   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882 #if defined(PETSC_USE_LOG)
883   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
884 #endif
885   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
886   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
887   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
888 #if defined (PETSC_USE_CTABLE)
889   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
890 #else
891   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
892 #endif
893   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
894   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
895   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
896   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
897   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
898   ierr = PetscFree(aij);CHKERRQ(ierr);
899 
900   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
905   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
906   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNCT__
912 #define __FUNCT__ "MatView_MPIAIJ_Binary"
913 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
914 {
915   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
916   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
917   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
918   PetscErrorCode    ierr;
919   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
920   int               fd;
921   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
922   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
923   PetscScalar       *column_values;
924 
925   PetscFunctionBegin;
926   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
927   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
928   nz   = A->nz + B->nz;
929   if (!rank) {
930     header[0] = MAT_FILE_COOKIE;
931     header[1] = mat->rmap->N;
932     header[2] = mat->cmap->N;
933     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
934     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
936     /* get largest number of rows any processor has */
937     rlen = mat->rmap->n;
938     range = mat->rmap->range;
939     for (i=1; i<size; i++) {
940       rlen = PetscMax(rlen,range[i+1] - range[i]);
941     }
942   } else {
943     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
944     rlen = mat->rmap->n;
945   }
946 
947   /* load up the local row counts */
948   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
949   for (i=0; i<mat->rmap->n; i++) {
950     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
951   }
952 
953   /* store the row lengths to the file */
954   if (!rank) {
955     MPI_Status status;
956     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
957     for (i=1; i<size; i++) {
958       rlen = range[i+1] - range[i];
959       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
960       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
961     }
962   } else {
963     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
964   }
965   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
966 
967   /* load up the local column indices */
968   nzmax = nz; /* )th processor needs space a largest processor needs */
969   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
970   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
971   cnt  = 0;
972   for (i=0; i<mat->rmap->n; i++) {
973     for (j=B->i[i]; j<B->i[i+1]; j++) {
974       if ( (col = garray[B->j[j]]) > cstart) break;
975       column_indices[cnt++] = col;
976     }
977     for (k=A->i[i]; k<A->i[i+1]; k++) {
978       column_indices[cnt++] = A->j[k] + cstart;
979     }
980     for (; j<B->i[i+1]; j++) {
981       column_indices[cnt++] = garray[B->j[j]];
982     }
983   }
984   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
985 
986   /* store the column indices to the file */
987   if (!rank) {
988     MPI_Status status;
989     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
990     for (i=1; i<size; i++) {
991       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
992       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
993       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
994       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
995     }
996   } else {
997     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
998     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
999   }
1000   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1001 
1002   /* load up the local column values */
1003   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1004   cnt  = 0;
1005   for (i=0; i<mat->rmap->n; i++) {
1006     for (j=B->i[i]; j<B->i[i+1]; j++) {
1007       if ( garray[B->j[j]] > cstart) break;
1008       column_values[cnt++] = B->a[j];
1009     }
1010     for (k=A->i[i]; k<A->i[i+1]; k++) {
1011       column_values[cnt++] = A->a[k];
1012     }
1013     for (; j<B->i[i+1]; j++) {
1014       column_values[cnt++] = B->a[j];
1015     }
1016   }
1017   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1018 
1019   /* store the column values to the file */
1020   if (!rank) {
1021     MPI_Status status;
1022     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1023     for (i=1; i<size; i++) {
1024       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1026       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1028     }
1029   } else {
1030     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1031     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1032   }
1033   ierr = PetscFree(column_values);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1039 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1040 {
1041   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1042   PetscErrorCode    ierr;
1043   PetscMPIInt       rank = aij->rank,size = aij->size;
1044   PetscTruth        isdraw,iascii,isbinary;
1045   PetscViewer       sviewer;
1046   PetscViewerFormat format;
1047 
1048   PetscFunctionBegin;
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1050   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1051   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1052   if (iascii) {
1053     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1054     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1055       MatInfo    info;
1056       PetscTruth inodes;
1057 
1058       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1059       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1060       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1061       if (!inodes) {
1062         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1063 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1064       } else {
1065         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1066 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1067       }
1068       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1070       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1071       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1072       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1073       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1074       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1075       PetscFunctionReturn(0);
1076     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1077       PetscInt   inodecount,inodelimit,*inodes;
1078       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1079       if (inodes) {
1080         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1081       } else {
1082         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1083       }
1084       PetscFunctionReturn(0);
1085     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1086       PetscFunctionReturn(0);
1087     }
1088   } else if (isbinary) {
1089     if (size == 1) {
1090       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1091       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1092     } else {
1093       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1094     }
1095     PetscFunctionReturn(0);
1096   } else if (isdraw) {
1097     PetscDraw  draw;
1098     PetscTruth isnull;
1099     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1100     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1101   }
1102 
1103   if (size == 1) {
1104     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106   } else {
1107     /* assemble the entire matrix onto first processor. */
1108     Mat         A;
1109     Mat_SeqAIJ  *Aloc;
1110     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1111     MatScalar   *a;
1112 
1113     if (mat->rmap->N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead");
1114 
1115     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1116     if (!rank) {
1117       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1118     } else {
1119       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1120     }
1121     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1122     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1123     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1124     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1125 
1126     /* copy over the A part */
1127     Aloc = (Mat_SeqAIJ*)aij->A->data;
1128     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1129     row = mat->rmap->rstart;
1130     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1131     for (i=0; i<m; i++) {
1132       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1133       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1134     }
1135     aj = Aloc->j;
1136     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1137 
1138     /* copy over the B part */
1139     Aloc = (Mat_SeqAIJ*)aij->B->data;
1140     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1141     row  = mat->rmap->rstart;
1142     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1143     ct   = cols;
1144     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1148     }
1149     ierr = PetscFree(ct);CHKERRQ(ierr);
1150     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1151     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1152     /*
1153        Everyone has to call to draw the matrix since the graphics waits are
1154        synchronized across all processors that share the PetscDraw object
1155     */
1156     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1157     if (!rank) {
1158       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1159       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1160     }
1161     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1162     ierr = MatDestroy(A);CHKERRQ(ierr);
1163   }
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatView_MPIAIJ"
1169 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1170 {
1171   PetscErrorCode ierr;
1172   PetscTruth     iascii,isdraw,issocket,isbinary;
1173 
1174   PetscFunctionBegin;
1175   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1176   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1177   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1178   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1179   if (iascii || isdraw || isbinary || issocket) {
1180     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1181   } else {
1182     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatRelax_MPIAIJ"
1189 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1190 {
1191   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1192   PetscErrorCode ierr;
1193   Vec            bb1;
1194 
1195   PetscFunctionBegin;
1196   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1197 
1198   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1199     if (flag & SOR_ZERO_INITIAL_GUESS) {
1200       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1201       its--;
1202     }
1203 
1204     while (its--) {
1205       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1207 
1208       /* update rhs: bb1 = bb - B*x */
1209       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1210       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1211 
1212       /* local sweep */
1213       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1214     }
1215   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1216     if (flag & SOR_ZERO_INITIAL_GUESS) {
1217       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1218       its--;
1219     }
1220     while (its--) {
1221       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1222       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223 
1224       /* update rhs: bb1 = bb - B*x */
1225       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1226       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1227 
1228       /* local sweep */
1229       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1230     }
1231   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1232     if (flag & SOR_ZERO_INITIAL_GUESS) {
1233       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1234       its--;
1235     }
1236     while (its--) {
1237       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1239 
1240       /* update rhs: bb1 = bb - B*x */
1241       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1242       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1243 
1244       /* local sweep */
1245       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1246     }
1247   } else {
1248     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1249   }
1250 
1251   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatPermute_MPIAIJ"
1257 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1258 {
1259   MPI_Comm       comm,pcomm;
1260   PetscInt       first,local_size,nrows,*rows;
1261   int            ntids;
1262   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1267   /* make a collective version of 'rowp' */
1268   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1269   if (pcomm==comm) {
1270     crowp = rowp;
1271   } else {
1272     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1273     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1274     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1275     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1276   }
1277   /* collect the global row permutation and invert it */
1278   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1279   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1280   if (pcomm!=comm) {
1281     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1282   }
1283   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1284   /* get the local target indices */
1285   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1286   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1287   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1288   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1289   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1290   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1291   /* the column permutation is so much easier;
1292      make a local version of 'colp' and invert it */
1293   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1294   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1295   if (ntids==1) {
1296     lcolp = colp;
1297   } else {
1298     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1299     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1300     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1301   }
1302   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1303   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1304   if (ntids>1) {
1305     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1306     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1307   }
1308   /* now we just get the submatrix */
1309   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1310   /* clean up */
1311   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1312   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1318 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1319 {
1320   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1321   Mat            A = mat->A,B = mat->B;
1322   PetscErrorCode ierr;
1323   PetscReal      isend[5],irecv[5];
1324 
1325   PetscFunctionBegin;
1326   info->block_size     = 1.0;
1327   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1328   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1329   isend[3] = info->memory;  isend[4] = info->mallocs;
1330   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1331   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1332   isend[3] += info->memory;  isend[4] += info->mallocs;
1333   if (flag == MAT_LOCAL) {
1334     info->nz_used      = isend[0];
1335     info->nz_allocated = isend[1];
1336     info->nz_unneeded  = isend[2];
1337     info->memory       = isend[3];
1338     info->mallocs      = isend[4];
1339   } else if (flag == MAT_GLOBAL_MAX) {
1340     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1341     info->nz_used      = irecv[0];
1342     info->nz_allocated = irecv[1];
1343     info->nz_unneeded  = irecv[2];
1344     info->memory       = irecv[3];
1345     info->mallocs      = irecv[4];
1346   } else if (flag == MAT_GLOBAL_SUM) {
1347     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1348     info->nz_used      = irecv[0];
1349     info->nz_allocated = irecv[1];
1350     info->nz_unneeded  = irecv[2];
1351     info->memory       = irecv[3];
1352     info->mallocs      = irecv[4];
1353   }
1354   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1355   info->fill_ratio_needed = 0;
1356   info->factor_mallocs    = 0;
1357   info->rows_global       = (double)matin->rmap->N;
1358   info->columns_global    = (double)matin->cmap->N;
1359   info->rows_local        = (double)matin->rmap->n;
1360   info->columns_local     = (double)matin->cmap->N;
1361 
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatSetOption_MPIAIJ"
1367 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1368 {
1369   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   switch (op) {
1374   case MAT_NEW_NONZERO_LOCATIONS:
1375   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1376   case MAT_KEEP_ZEROED_ROWS:
1377   case MAT_NEW_NONZERO_LOCATION_ERR:
1378   case MAT_USE_INODES:
1379   case MAT_IGNORE_ZERO_ENTRIES:
1380     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1381     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1382     break;
1383   case MAT_ROW_ORIENTED:
1384     a->roworiented = flg;
1385     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1386     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1387     break;
1388   case MAT_NEW_DIAGONALS:
1389     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1390     break;
1391   case MAT_IGNORE_OFF_PROC_ENTRIES:
1392     a->donotstash = PETSC_TRUE;
1393     break;
1394   case MAT_SYMMETRIC:
1395     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1396     break;
1397   case MAT_STRUCTURALLY_SYMMETRIC:
1398   case MAT_HERMITIAN:
1399   case MAT_SYMMETRY_ETERNAL:
1400     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1401     break;
1402   default:
1403     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatGetRow_MPIAIJ"
1410 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1411 {
1412   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1413   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1414   PetscErrorCode ierr;
1415   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1416   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1417   PetscInt       *cmap,*idx_p;
1418 
1419   PetscFunctionBegin;
1420   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1421   mat->getrowactive = PETSC_TRUE;
1422 
1423   if (!mat->rowvalues && (idx || v)) {
1424     /*
1425         allocate enough space to hold information from the longest row.
1426     */
1427     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1428     PetscInt     max = 1,tmp;
1429     for (i=0; i<matin->rmap->n; i++) {
1430       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1431       if (max < tmp) { max = tmp; }
1432     }
1433     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1434     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1435   }
1436 
1437   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1438   lrow = row - rstart;
1439 
1440   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1441   if (!v)   {pvA = 0; pvB = 0;}
1442   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1443   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1444   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1445   nztot = nzA + nzB;
1446 
1447   cmap  = mat->garray;
1448   if (v  || idx) {
1449     if (nztot) {
1450       /* Sort by increasing column numbers, assuming A and B already sorted */
1451       PetscInt imark = -1;
1452       if (v) {
1453         *v = v_p = mat->rowvalues;
1454         for (i=0; i<nzB; i++) {
1455           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1456           else break;
1457         }
1458         imark = i;
1459         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1460         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1461       }
1462       if (idx) {
1463         *idx = idx_p = mat->rowindices;
1464         if (imark > -1) {
1465           for (i=0; i<imark; i++) {
1466             idx_p[i] = cmap[cworkB[i]];
1467           }
1468         } else {
1469           for (i=0; i<nzB; i++) {
1470             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1471             else break;
1472           }
1473           imark = i;
1474         }
1475         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1476         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1477       }
1478     } else {
1479       if (idx) *idx = 0;
1480       if (v)   *v   = 0;
1481     }
1482   }
1483   *nz = nztot;
1484   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1485   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 #undef __FUNCT__
1490 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1491 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1492 {
1493   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1494 
1495   PetscFunctionBegin;
1496   if (!aij->getrowactive) {
1497     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1498   }
1499   aij->getrowactive = PETSC_FALSE;
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatNorm_MPIAIJ"
1505 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1506 {
1507   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1508   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1509   PetscErrorCode ierr;
1510   PetscInt       i,j,cstart = mat->cmap->rstart;
1511   PetscReal      sum = 0.0;
1512   MatScalar      *v;
1513 
1514   PetscFunctionBegin;
1515   if (aij->size == 1) {
1516     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1517   } else {
1518     if (type == NORM_FROBENIUS) {
1519       v = amat->a;
1520       for (i=0; i<amat->nz; i++) {
1521 #if defined(PETSC_USE_COMPLEX)
1522         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1523 #else
1524         sum += (*v)*(*v); v++;
1525 #endif
1526       }
1527       v = bmat->a;
1528       for (i=0; i<bmat->nz; i++) {
1529 #if defined(PETSC_USE_COMPLEX)
1530         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1531 #else
1532         sum += (*v)*(*v); v++;
1533 #endif
1534       }
1535       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1536       *norm = sqrt(*norm);
1537     } else if (type == NORM_1) { /* max column norm */
1538       PetscReal *tmp,*tmp2;
1539       PetscInt  *jj,*garray = aij->garray;
1540       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1541       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1542       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1543       *norm = 0.0;
1544       v = amat->a; jj = amat->j;
1545       for (j=0; j<amat->nz; j++) {
1546         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1547       }
1548       v = bmat->a; jj = bmat->j;
1549       for (j=0; j<bmat->nz; j++) {
1550         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1551       }
1552       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1553       for (j=0; j<mat->cmap->N; j++) {
1554         if (tmp2[j] > *norm) *norm = tmp2[j];
1555       }
1556       ierr = PetscFree(tmp);CHKERRQ(ierr);
1557       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1558     } else if (type == NORM_INFINITY) { /* max row norm */
1559       PetscReal ntemp = 0.0;
1560       for (j=0; j<aij->A->rmap->n; j++) {
1561         v = amat->a + amat->i[j];
1562         sum = 0.0;
1563         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1564           sum += PetscAbsScalar(*v); v++;
1565         }
1566         v = bmat->a + bmat->i[j];
1567         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1568           sum += PetscAbsScalar(*v); v++;
1569         }
1570         if (sum > ntemp) ntemp = sum;
1571       }
1572       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1573     } else {
1574       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1575     }
1576   }
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatTranspose_MPIAIJ"
1582 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1583 {
1584   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1585   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1586   PetscErrorCode ierr;
1587   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1588   PetscInt       cstart=A->cmap->rstart,ncol;
1589   Mat            B;
1590   MatScalar      *array;
1591 
1592   PetscFunctionBegin;
1593   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1594 
1595   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1596   ai = Aloc->i; aj = Aloc->j;
1597   bi = Bloc->i; bj = Bloc->j;
1598   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1599     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1600     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1601     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1602     for (i=0; i<ai[ma]; i++){
1603       d_nnz[aj[i]] ++;
1604       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1605     }
1606 
1607     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1608     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1609     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1610     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1611     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1612   } else {
1613     B = *matout;
1614   }
1615 
1616   /* copy over the A part */
1617   array = Aloc->a;
1618   row = A->rmap->rstart;
1619   for (i=0; i<ma; i++) {
1620     ncol = ai[i+1]-ai[i];
1621     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1622     row++; array += ncol; aj += ncol;
1623   }
1624   aj = Aloc->j;
1625   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1626 
1627   /* copy over the B part */
1628   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1629   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1630   array = Bloc->a;
1631   row = A->rmap->rstart;
1632   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1633   cols_tmp = cols;
1634   for (i=0; i<mb; i++) {
1635     ncol = bi[i+1]-bi[i];
1636     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1637     row++; array += ncol; cols_tmp += ncol;
1638   }
1639   ierr = PetscFree(cols);CHKERRQ(ierr);
1640 
1641   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1642   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1643   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1644     *matout = B;
1645   } else {
1646     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1647   }
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 #undef __FUNCT__
1652 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1653 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1654 {
1655   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1656   Mat            a = aij->A,b = aij->B;
1657   PetscErrorCode ierr;
1658   PetscInt       s1,s2,s3;
1659 
1660   PetscFunctionBegin;
1661   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1662   if (rr) {
1663     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1664     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1665     /* Overlap communication with computation. */
1666     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1667   }
1668   if (ll) {
1669     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1670     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1671     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1672   }
1673   /* scale  the diagonal block */
1674   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1675 
1676   if (rr) {
1677     /* Do a scatter end and then right scale the off-diagonal block */
1678     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1679     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1680   }
1681 
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1687 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1688 {
1689   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1690   PetscErrorCode ierr;
1691 
1692   PetscFunctionBegin;
1693   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1694   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1699 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1700 {
1701   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1702   PetscErrorCode ierr;
1703 
1704   PetscFunctionBegin;
1705   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatEqual_MPIAIJ"
1711 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1712 {
1713   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1714   Mat            a,b,c,d;
1715   PetscTruth     flg;
1716   PetscErrorCode ierr;
1717 
1718   PetscFunctionBegin;
1719   a = matA->A; b = matA->B;
1720   c = matB->A; d = matB->B;
1721 
1722   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1723   if (flg) {
1724     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1725   }
1726   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatCopy_MPIAIJ"
1732 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1733 {
1734   PetscErrorCode ierr;
1735   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1736   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1737 
1738   PetscFunctionBegin;
1739   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1740   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1741     /* because of the column compression in the off-processor part of the matrix a->B,
1742        the number of columns in a->B and b->B may be different, hence we cannot call
1743        the MatCopy() directly on the two parts. If need be, we can provide a more
1744        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1745        then copying the submatrices */
1746     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1747   } else {
1748     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1749     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1750   }
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1756 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1757 {
1758   PetscErrorCode ierr;
1759 
1760   PetscFunctionBegin;
1761   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 #include "petscblaslapack.h"
1766 #undef __FUNCT__
1767 #define __FUNCT__ "MatAXPY_MPIAIJ"
1768 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1769 {
1770   PetscErrorCode ierr;
1771   PetscInt       i;
1772   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1773   PetscBLASInt   bnz,one=1;
1774   Mat_SeqAIJ     *x,*y;
1775 
1776   PetscFunctionBegin;
1777   if (str == SAME_NONZERO_PATTERN) {
1778     PetscScalar alpha = a;
1779     x = (Mat_SeqAIJ *)xx->A->data;
1780     y = (Mat_SeqAIJ *)yy->A->data;
1781     bnz = PetscBLASIntCast(x->nz);
1782     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1783     x = (Mat_SeqAIJ *)xx->B->data;
1784     y = (Mat_SeqAIJ *)yy->B->data;
1785     bnz = PetscBLASIntCast(x->nz);
1786     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1787   } else if (str == SUBSET_NONZERO_PATTERN) {
1788     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1789 
1790     x = (Mat_SeqAIJ *)xx->B->data;
1791     y = (Mat_SeqAIJ *)yy->B->data;
1792     if (y->xtoy && y->XtoY != xx->B) {
1793       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1794       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1795     }
1796     if (!y->xtoy) { /* get xtoy */
1797       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1798       y->XtoY = xx->B;
1799       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1800     }
1801     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1802   } else {
1803     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1804   }
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1809 
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatConjugate_MPIAIJ"
1812 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1813 {
1814 #if defined(PETSC_USE_COMPLEX)
1815   PetscErrorCode ierr;
1816   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1817 
1818   PetscFunctionBegin;
1819   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1820   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1821 #else
1822   PetscFunctionBegin;
1823 #endif
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 #undef __FUNCT__
1828 #define __FUNCT__ "MatRealPart_MPIAIJ"
1829 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1830 {
1831   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1832   PetscErrorCode ierr;
1833 
1834   PetscFunctionBegin;
1835   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1836   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1842 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1843 {
1844   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1845   PetscErrorCode ierr;
1846 
1847   PetscFunctionBegin;
1848   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1849   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 #ifdef PETSC_HAVE_PBGL
1854 
1855 #include <boost/parallel/mpi/bsp_process_group.hpp>
1856 #include <boost/graph/distributed/ilu_default_graph.hpp>
1857 #include <boost/graph/distributed/ilu_0_block.hpp>
1858 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1859 #include <boost/graph/distributed/petsc/interface.hpp>
1860 #include <boost/multi_array.hpp>
1861 #include <boost/parallel/distributed_property_map->hpp>
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1865 /*
1866   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1867 */
1868 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, MatFactorInfo *info)
1869 {
1870   namespace petsc = boost::distributed::petsc;
1871 
1872   namespace graph_dist = boost::graph::distributed;
1873   using boost::graph::distributed::ilu_default::process_group_type;
1874   using boost::graph::ilu_permuted;
1875 
1876   PetscTruth      row_identity, col_identity;
1877   PetscContainer  c;
1878   PetscInt        m, n, M, N;
1879   PetscErrorCode  ierr;
1880 
1881   PetscFunctionBegin;
1882   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1883   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1884   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1885   if (!row_identity || !col_identity) {
1886     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1887   }
1888 
1889   process_group_type pg;
1890   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1891   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1892   lgraph_type&   level_graph = *lgraph_p;
1893   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1894 
1895   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1896   ilu_permuted(level_graph);
1897 
1898   /* put together the new matrix */
1899   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1900   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1901   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1902   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1903   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1904   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1905   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1906 
1907   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1908   ierr = PetscContainerSetPointer(c, lgraph_p);
1909   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 #undef __FUNCT__
1914 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1915 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, MatFactorInfo *info)
1916 {
1917   PetscFunctionBegin;
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatSolve_MPIAIJ"
1923 /*
1924   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1925 */
1926 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1927 {
1928   namespace graph_dist = boost::graph::distributed;
1929 
1930   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1931   lgraph_type*   lgraph_p;
1932   PetscContainer c;
1933   PetscErrorCode ierr;
1934 
1935   PetscFunctionBegin;
1936   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1937   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1938   ierr = VecCopy(b, x); CHKERRQ(ierr);
1939 
1940   PetscScalar* array_x;
1941   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1942   PetscInt sx;
1943   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1944 
1945   PetscScalar* array_b;
1946   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1947   PetscInt sb;
1948   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1949 
1950   lgraph_type&   level_graph = *lgraph_p;
1951   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1952 
1953   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1954   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1955                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1956 
1957   typedef boost::iterator_property_map<array_ref_type::iterator,
1958                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1959   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1960                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1961 
1962   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1963 
1964   PetscFunctionReturn(0);
1965 }
1966 #endif
1967 
1968 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1969   PetscInt       nzlocal,nsends,nrecvs;
1970   PetscMPIInt    *send_rank;
1971   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1972   PetscScalar    *sbuf_a,**rbuf_a;
1973   PetscErrorCode (*MatDestroy)(Mat);
1974 } Mat_Redundant;
1975 
1976 #undef __FUNCT__
1977 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1978 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1979 {
1980   PetscErrorCode       ierr;
1981   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1982   PetscInt             i;
1983 
1984   PetscFunctionBegin;
1985   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1986   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1987   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1988   for (i=0; i<redund->nrecvs; i++){
1989     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1990     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1991   }
1992   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1993   ierr = PetscFree(redund);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatDestroy_MatRedundant"
1999 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2000 {
2001   PetscErrorCode  ierr;
2002   PetscContainer  container;
2003   Mat_Redundant   *redund=PETSC_NULL;
2004 
2005   PetscFunctionBegin;
2006   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2007   if (container) {
2008     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2009   } else {
2010     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2011   }
2012   A->ops->destroy = redund->MatDestroy;
2013   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2014   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2015   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 #undef __FUNCT__
2020 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2021 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2022 {
2023   PetscMPIInt    rank,size;
2024   MPI_Comm       comm=((PetscObject)mat)->comm;
2025   PetscErrorCode ierr;
2026   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2027   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2028   PetscInt       *rowrange=mat->rmap->range;
2029   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2030   Mat            A=aij->A,B=aij->B,C=*matredundant;
2031   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2032   PetscScalar    *sbuf_a;
2033   PetscInt       nzlocal=a->nz+b->nz;
2034   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2035   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2036   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2037   MatScalar      *aworkA,*aworkB;
2038   PetscScalar    *vals;
2039   PetscMPIInt    tag1,tag2,tag3,imdex;
2040   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2041                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2042   MPI_Status     recv_status,*send_status;
2043   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2044   PetscInt       **rbuf_j=PETSC_NULL;
2045   PetscScalar    **rbuf_a=PETSC_NULL;
2046   Mat_Redundant  *redund=PETSC_NULL;
2047   PetscContainer container;
2048 
2049   PetscFunctionBegin;
2050   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2051   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2052 
2053   if (reuse == MAT_REUSE_MATRIX) {
2054     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2055     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2056     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2057     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2058     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2059     if (container) {
2060       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2061     } else {
2062       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2063     }
2064     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2065 
2066     nsends    = redund->nsends;
2067     nrecvs    = redund->nrecvs;
2068     send_rank = redund->send_rank; recv_rank = send_rank + size;
2069     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
2070     sbuf_j    = redund->sbuf_j;
2071     sbuf_a    = redund->sbuf_a;
2072     rbuf_j    = redund->rbuf_j;
2073     rbuf_a    = redund->rbuf_a;
2074   }
2075 
2076   if (reuse == MAT_INITIAL_MATRIX){
2077     PetscMPIInt  subrank,subsize;
2078     PetscInt     nleftover,np_subcomm;
2079     /* get the destination processors' id send_rank, nsends and nrecvs */
2080     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2081     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2082     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
2083     recv_rank = send_rank + size;
2084     np_subcomm = size/nsubcomm;
2085     nleftover  = size - nsubcomm*np_subcomm;
2086     nsends = 0; nrecvs = 0;
2087     for (i=0; i<size; i++){ /* i=rank*/
2088       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2089         send_rank[nsends] = i; nsends++;
2090         recv_rank[nrecvs++] = i;
2091       }
2092     }
2093     if (rank >= size - nleftover){/* this proc is a leftover processor */
2094       i = size-nleftover-1;
2095       j = 0;
2096       while (j < nsubcomm - nleftover){
2097         send_rank[nsends++] = i;
2098         i--; j++;
2099       }
2100     }
2101 
2102     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2103       for (i=0; i<nleftover; i++){
2104         recv_rank[nrecvs++] = size-nleftover+i;
2105       }
2106     }
2107 
2108     /* allocate sbuf_j, sbuf_a */
2109     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2110     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2111     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2112   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2113 
2114   /* copy mat's local entries into the buffers */
2115   if (reuse == MAT_INITIAL_MATRIX){
2116     rownz_max = 0;
2117     rptr = sbuf_j;
2118     cols = sbuf_j + rend-rstart + 1;
2119     vals = sbuf_a;
2120     rptr[0] = 0;
2121     for (i=0; i<rend-rstart; i++){
2122       row = i + rstart;
2123       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2124       ncols  = nzA + nzB;
2125       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2126       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2127       /* load the column indices for this row into cols */
2128       lwrite = 0;
2129       for (l=0; l<nzB; l++) {
2130         if ((ctmp = bmap[cworkB[l]]) < cstart){
2131           vals[lwrite]   = aworkB[l];
2132           cols[lwrite++] = ctmp;
2133         }
2134       }
2135       for (l=0; l<nzA; l++){
2136         vals[lwrite]   = aworkA[l];
2137         cols[lwrite++] = cstart + cworkA[l];
2138       }
2139       for (l=0; l<nzB; l++) {
2140         if ((ctmp = bmap[cworkB[l]]) >= cend){
2141           vals[lwrite]   = aworkB[l];
2142           cols[lwrite++] = ctmp;
2143         }
2144       }
2145       vals += ncols;
2146       cols += ncols;
2147       rptr[i+1] = rptr[i] + ncols;
2148       if (rownz_max < ncols) rownz_max = ncols;
2149     }
2150     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2151   } else { /* only copy matrix values into sbuf_a */
2152     rptr = sbuf_j;
2153     vals = sbuf_a;
2154     rptr[0] = 0;
2155     for (i=0; i<rend-rstart; i++){
2156       row = i + rstart;
2157       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2158       ncols  = nzA + nzB;
2159       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2160       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2161       lwrite = 0;
2162       for (l=0; l<nzB; l++) {
2163         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2164       }
2165       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2166       for (l=0; l<nzB; l++) {
2167         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2168       }
2169       vals += ncols;
2170       rptr[i+1] = rptr[i] + ncols;
2171     }
2172   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2173 
2174   /* send nzlocal to others, and recv other's nzlocal */
2175   /*--------------------------------------------------*/
2176   if (reuse == MAT_INITIAL_MATRIX){
2177     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2178     s_waits2 = s_waits3 + nsends;
2179     s_waits1 = s_waits2 + nsends;
2180     r_waits1 = s_waits1 + nsends;
2181     r_waits2 = r_waits1 + nrecvs;
2182     r_waits3 = r_waits2 + nrecvs;
2183   } else {
2184     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2185     r_waits3 = s_waits3 + nsends;
2186   }
2187 
2188   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2189   if (reuse == MAT_INITIAL_MATRIX){
2190     /* get new tags to keep the communication clean */
2191     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2192     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2193     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2194     rbuf_nz = sbuf_nz + nsends;
2195 
2196     /* post receives of other's nzlocal */
2197     for (i=0; i<nrecvs; i++){
2198       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2199     }
2200     /* send nzlocal to others */
2201     for (i=0; i<nsends; i++){
2202       sbuf_nz[i] = nzlocal;
2203       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2204     }
2205     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2206     count = nrecvs;
2207     while (count) {
2208       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2209       recv_rank[imdex] = recv_status.MPI_SOURCE;
2210       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2211       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2212 
2213       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2214       rbuf_nz[imdex] += i + 2;
2215       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2216       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2217       count--;
2218     }
2219     /* wait on sends of nzlocal */
2220     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2221     /* send mat->i,j to others, and recv from other's */
2222     /*------------------------------------------------*/
2223     for (i=0; i<nsends; i++){
2224       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2225       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2226     }
2227     /* wait on receives of mat->i,j */
2228     /*------------------------------*/
2229     count = nrecvs;
2230     while (count) {
2231       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2232       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2233       count--;
2234     }
2235     /* wait on sends of mat->i,j */
2236     /*---------------------------*/
2237     if (nsends) {
2238       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2239     }
2240   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2241 
2242   /* post receives, send and receive mat->a */
2243   /*----------------------------------------*/
2244   for (imdex=0; imdex<nrecvs; imdex++) {
2245     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2246   }
2247   for (i=0; i<nsends; i++){
2248     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2249   }
2250   count = nrecvs;
2251   while (count) {
2252     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2253     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2254     count--;
2255   }
2256   if (nsends) {
2257     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2258   }
2259 
2260   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2261 
2262   /* create redundant matrix */
2263   /*-------------------------*/
2264   if (reuse == MAT_INITIAL_MATRIX){
2265     /* compute rownz_max for preallocation */
2266     for (imdex=0; imdex<nrecvs; imdex++){
2267       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2268       rptr = rbuf_j[imdex];
2269       for (i=0; i<j; i++){
2270         ncols = rptr[i+1] - rptr[i];
2271         if (rownz_max < ncols) rownz_max = ncols;
2272       }
2273     }
2274 
2275     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2276     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2277     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2278     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2279     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2280   } else {
2281     C = *matredundant;
2282   }
2283 
2284   /* insert local matrix entries */
2285   rptr = sbuf_j;
2286   cols = sbuf_j + rend-rstart + 1;
2287   vals = sbuf_a;
2288   for (i=0; i<rend-rstart; i++){
2289     row   = i + rstart;
2290     ncols = rptr[i+1] - rptr[i];
2291     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2292     vals += ncols;
2293     cols += ncols;
2294   }
2295   /* insert received matrix entries */
2296   for (imdex=0; imdex<nrecvs; imdex++){
2297     rstart = rowrange[recv_rank[imdex]];
2298     rend   = rowrange[recv_rank[imdex]+1];
2299     rptr = rbuf_j[imdex];
2300     cols = rbuf_j[imdex] + rend-rstart + 1;
2301     vals = rbuf_a[imdex];
2302     for (i=0; i<rend-rstart; i++){
2303       row   = i + rstart;
2304       ncols = rptr[i+1] - rptr[i];
2305       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2306       vals += ncols;
2307       cols += ncols;
2308     }
2309   }
2310   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2311   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2312   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2313   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2314   if (reuse == MAT_INITIAL_MATRIX){
2315     PetscContainer container;
2316     *matredundant = C;
2317     /* create a supporting struct and attach it to C for reuse */
2318     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2319     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2320     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2321     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2322     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2323 
2324     redund->nzlocal = nzlocal;
2325     redund->nsends  = nsends;
2326     redund->nrecvs  = nrecvs;
2327     redund->send_rank = send_rank;
2328     redund->sbuf_nz = sbuf_nz;
2329     redund->sbuf_j  = sbuf_j;
2330     redund->sbuf_a  = sbuf_a;
2331     redund->rbuf_j  = rbuf_j;
2332     redund->rbuf_a  = rbuf_a;
2333 
2334     redund->MatDestroy = C->ops->destroy;
2335     C->ops->destroy    = MatDestroy_MatRedundant;
2336   }
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 #undef __FUNCT__
2341 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2342 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2343 {
2344   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2345   PetscErrorCode ierr;
2346   PetscInt       i,*idxb = 0;
2347   PetscScalar    *va,*vb;
2348   Vec            vtmp;
2349 
2350   PetscFunctionBegin;
2351   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2352   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2353   if (idx) {
2354     for (i=0; i<A->cmap->n; i++) {
2355       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2356     }
2357   }
2358 
2359   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2360   if (idx) {
2361     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2362   }
2363   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2364   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2365 
2366   for (i=0; i<A->rmap->n; i++){
2367     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2368       va[i] = vb[i];
2369       if (idx) idx[i] = a->garray[idxb[i]];
2370     }
2371   }
2372 
2373   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2374   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2375   if (idxb) {
2376     ierr = PetscFree(idxb);CHKERRQ(ierr);
2377   }
2378   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 #undef __FUNCT__
2383 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2384 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2385 {
2386   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2387   PetscInt       n      = A->rmap->n;
2388   PetscInt       cstart = A->cmap->rstart;
2389   PetscInt      *cmap   = mat->garray;
2390   PetscInt      *diagIdx, *offdiagIdx;
2391   Vec            diagV, offdiagV;
2392   PetscScalar   *a, *diagA, *offdiagA;
2393   PetscInt       r;
2394   PetscErrorCode ierr;
2395 
2396   PetscFunctionBegin;
2397   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2398   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2399   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2400   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2401   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2402   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2403   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2404   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2405   for(r = 0; r < n; ++r) {
2406     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2407       a[r]   = diagA[r];
2408       idx[r] = cstart + diagIdx[r];
2409     } else {
2410       a[r]   = offdiagA[r];
2411       idx[r] = cmap[offdiagIdx[r]];
2412     }
2413   }
2414   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2415   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2416   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2417   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2418   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2419   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 #undef __FUNCT__
2424 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2425 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2426 {
2427   PetscErrorCode ierr;
2428 
2429   PetscFunctionBegin;
2430   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 /* -------------------------------------------------------------------*/
2435 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2436        MatGetRow_MPIAIJ,
2437        MatRestoreRow_MPIAIJ,
2438        MatMult_MPIAIJ,
2439 /* 4*/ MatMultAdd_MPIAIJ,
2440        MatMultTranspose_MPIAIJ,
2441        MatMultTransposeAdd_MPIAIJ,
2442 #ifdef PETSC_HAVE_PBGL
2443        MatSolve_MPIAIJ,
2444 #else
2445        0,
2446 #endif
2447        0,
2448        0,
2449 /*10*/ 0,
2450        0,
2451        0,
2452        MatRelax_MPIAIJ,
2453        MatTranspose_MPIAIJ,
2454 /*15*/ MatGetInfo_MPIAIJ,
2455        MatEqual_MPIAIJ,
2456        MatGetDiagonal_MPIAIJ,
2457        MatDiagonalScale_MPIAIJ,
2458        MatNorm_MPIAIJ,
2459 /*20*/ MatAssemblyBegin_MPIAIJ,
2460        MatAssemblyEnd_MPIAIJ,
2461        0,
2462        MatSetOption_MPIAIJ,
2463        MatZeroEntries_MPIAIJ,
2464 /*25*/ MatZeroRows_MPIAIJ,
2465        0,
2466 #ifdef PETSC_HAVE_PBGL
2467        0,
2468 #else
2469        0,
2470 #endif
2471        0,
2472        0,
2473 /*30*/ MatSetUpPreallocation_MPIAIJ,
2474 #ifdef PETSC_HAVE_PBGL
2475        0,
2476 #else
2477        0,
2478 #endif
2479        0,
2480        0,
2481        0,
2482 /*35*/ MatDuplicate_MPIAIJ,
2483        0,
2484        0,
2485        0,
2486        0,
2487 /*40*/ MatAXPY_MPIAIJ,
2488        MatGetSubMatrices_MPIAIJ,
2489        MatIncreaseOverlap_MPIAIJ,
2490        MatGetValues_MPIAIJ,
2491        MatCopy_MPIAIJ,
2492 /*45*/ 0,
2493        MatScale_MPIAIJ,
2494        0,
2495        0,
2496        0,
2497 /*50*/ MatSetBlockSize_MPIAIJ,
2498        0,
2499        0,
2500        0,
2501        0,
2502 /*55*/ MatFDColoringCreate_MPIAIJ,
2503        0,
2504        MatSetUnfactored_MPIAIJ,
2505        MatPermute_MPIAIJ,
2506        0,
2507 /*60*/ MatGetSubMatrix_MPIAIJ,
2508        MatDestroy_MPIAIJ,
2509        MatView_MPIAIJ,
2510        0,
2511        0,
2512 /*65*/ 0,
2513        0,
2514        0,
2515        0,
2516        0,
2517 /*70*/ MatGetRowMaxAbs_MPIAIJ,
2518        0,
2519        MatSetColoring_MPIAIJ,
2520 #if defined(PETSC_HAVE_ADIC)
2521        MatSetValuesAdic_MPIAIJ,
2522 #else
2523        0,
2524 #endif
2525        MatSetValuesAdifor_MPIAIJ,
2526 /*75*/ 0,
2527        0,
2528        0,
2529        0,
2530        0,
2531 /*80*/ 0,
2532        0,
2533        0,
2534        0,
2535 /*84*/ MatLoad_MPIAIJ,
2536        0,
2537        0,
2538        0,
2539        0,
2540        0,
2541 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2542        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2543        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2544        MatPtAP_Basic,
2545        MatPtAPSymbolic_MPIAIJ,
2546 /*95*/ MatPtAPNumeric_MPIAIJ,
2547        0,
2548        0,
2549        0,
2550        0,
2551 /*100*/0,
2552        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2553        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2554        MatConjugate_MPIAIJ,
2555        0,
2556 /*105*/MatSetValuesRow_MPIAIJ,
2557        MatRealPart_MPIAIJ,
2558        MatImaginaryPart_MPIAIJ,
2559        0,
2560        0,
2561 /*110*/0,
2562        MatGetRedundantMatrix_MPIAIJ,
2563        MatGetRowMin_MPIAIJ,
2564        0,
2565        0,
2566 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2567 
2568 /* ----------------------------------------------------------------------------------------*/
2569 
2570 EXTERN_C_BEGIN
2571 #undef __FUNCT__
2572 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2573 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2574 {
2575   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2576   PetscErrorCode ierr;
2577 
2578   PetscFunctionBegin;
2579   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2580   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2581   PetscFunctionReturn(0);
2582 }
2583 EXTERN_C_END
2584 
2585 EXTERN_C_BEGIN
2586 #undef __FUNCT__
2587 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2588 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2589 {
2590   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2591   PetscErrorCode ierr;
2592 
2593   PetscFunctionBegin;
2594   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2595   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2596   PetscFunctionReturn(0);
2597 }
2598 EXTERN_C_END
2599 
2600 #include "petscpc.h"
2601 EXTERN_C_BEGIN
2602 #undef __FUNCT__
2603 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2604 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2605 {
2606   Mat_MPIAIJ     *b;
2607   PetscErrorCode ierr;
2608   PetscInt       i;
2609 
2610   PetscFunctionBegin;
2611   B->preallocated = PETSC_TRUE;
2612   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2613   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2614   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2615   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2616 
2617   B->rmap->bs = B->cmap->bs = 1;
2618   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2619   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2620   if (d_nnz) {
2621     for (i=0; i<B->rmap->n; i++) {
2622       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2623     }
2624   }
2625   if (o_nnz) {
2626     for (i=0; i<B->rmap->n; i++) {
2627       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2628     }
2629   }
2630   b = (Mat_MPIAIJ*)B->data;
2631 
2632   /* Explicitly create 2 MATSEQAIJ matrices. */
2633   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2634   ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2635   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2636   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2637   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2638   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2639   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2640   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2641 
2642   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2643   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2644 
2645   PetscFunctionReturn(0);
2646 }
2647 EXTERN_C_END
2648 
2649 #undef __FUNCT__
2650 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2651 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2652 {
2653   Mat            mat;
2654   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2655   PetscErrorCode ierr;
2656 
2657   PetscFunctionBegin;
2658   *newmat       = 0;
2659   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2660   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2661   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2662   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2663   a    = (Mat_MPIAIJ*)mat->data;
2664 
2665   mat->factor       = matin->factor;
2666   mat->rmap->bs      = matin->rmap->bs;
2667   mat->assembled    = PETSC_TRUE;
2668   mat->insertmode   = NOT_SET_VALUES;
2669   mat->preallocated = PETSC_TRUE;
2670 
2671   a->size           = oldmat->size;
2672   a->rank           = oldmat->rank;
2673   a->donotstash     = oldmat->donotstash;
2674   a->roworiented    = oldmat->roworiented;
2675   a->rowindices     = 0;
2676   a->rowvalues      = 0;
2677   a->getrowactive   = PETSC_FALSE;
2678 
2679   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2680   ierr = PetscMapCopy(((PetscObject)mat)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2681 
2682   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2683   if (oldmat->colmap) {
2684 #if defined (PETSC_USE_CTABLE)
2685     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2686 #else
2687     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2688     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2689     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2690 #endif
2691   } else a->colmap = 0;
2692   if (oldmat->garray) {
2693     PetscInt len;
2694     len  = oldmat->B->cmap->n;
2695     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2696     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2697     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2698   } else a->garray = 0;
2699 
2700   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2701   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2702   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2703   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2704   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2705   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2706   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2707   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2708   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2709   *newmat = mat;
2710   PetscFunctionReturn(0);
2711 }
2712 
2713 #include "petscsys.h"
2714 
2715 #undef __FUNCT__
2716 #define __FUNCT__ "MatLoad_MPIAIJ"
2717 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2718 {
2719   Mat            A;
2720   PetscScalar    *vals,*svals;
2721   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2722   MPI_Status     status;
2723   PetscErrorCode ierr;
2724   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2725   PetscInt       i,nz,j,rstart,rend,mmax;
2726   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2727   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2728   PetscInt       cend,cstart,n,*rowners;
2729   int            fd;
2730 
2731   PetscFunctionBegin;
2732   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2733   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2734   if (!rank) {
2735     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2736     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2737     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2738   }
2739 
2740   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2741   M = header[1]; N = header[2];
2742   /* determine ownership of all rows */
2743   m    = M/size + ((M % size) > rank);
2744   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2745   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2746 
2747   /* First process needs enough room for process with most rows */
2748   if (!rank) {
2749     mmax       = rowners[1];
2750     for (i=2; i<size; i++) {
2751       mmax = PetscMax(mmax,rowners[i]);
2752     }
2753   } else mmax = m;
2754 
2755   rowners[0] = 0;
2756   for (i=2; i<=size; i++) {
2757     rowners[i] += rowners[i-1];
2758   }
2759   rstart = rowners[rank];
2760   rend   = rowners[rank+1];
2761 
2762   /* distribute row lengths to all processors */
2763   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2764   if (!rank) {
2765     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2766     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2767     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2768     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2769     for (j=0; j<m; j++) {
2770       procsnz[0] += ourlens[j];
2771     }
2772     for (i=1; i<size; i++) {
2773       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2774       /* calculate the number of nonzeros on each processor */
2775       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2776         procsnz[i] += rowlengths[j];
2777       }
2778       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2779     }
2780     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2781   } else {
2782     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2783   }
2784 
2785   if (!rank) {
2786     /* determine max buffer needed and allocate it */
2787     maxnz = 0;
2788     for (i=0; i<size; i++) {
2789       maxnz = PetscMax(maxnz,procsnz[i]);
2790     }
2791     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2792 
2793     /* read in my part of the matrix column indices  */
2794     nz   = procsnz[0];
2795     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2796     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2797 
2798     /* read in every one elses and ship off */
2799     for (i=1; i<size; i++) {
2800       nz   = procsnz[i];
2801       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2802       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2803     }
2804     ierr = PetscFree(cols);CHKERRQ(ierr);
2805   } else {
2806     /* determine buffer space needed for message */
2807     nz = 0;
2808     for (i=0; i<m; i++) {
2809       nz += ourlens[i];
2810     }
2811     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2812 
2813     /* receive message of column indices*/
2814     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2815     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2816     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2817   }
2818 
2819   /* determine column ownership if matrix is not square */
2820   if (N != M) {
2821     n      = N/size + ((N % size) > rank);
2822     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2823     cstart = cend - n;
2824   } else {
2825     cstart = rstart;
2826     cend   = rend;
2827     n      = cend - cstart;
2828   }
2829 
2830   /* loop over local rows, determining number of off diagonal entries */
2831   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2832   jj = 0;
2833   for (i=0; i<m; i++) {
2834     for (j=0; j<ourlens[i]; j++) {
2835       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2836       jj++;
2837     }
2838   }
2839 
2840   /* create our matrix */
2841   for (i=0; i<m; i++) {
2842     ourlens[i] -= offlens[i];
2843   }
2844   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2845   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2846   ierr = MatSetType(A,type);CHKERRQ(ierr);
2847   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2848 
2849   for (i=0; i<m; i++) {
2850     ourlens[i] += offlens[i];
2851   }
2852 
2853   if (!rank) {
2854     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2855 
2856     /* read in my part of the matrix numerical values  */
2857     nz   = procsnz[0];
2858     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2859 
2860     /* insert into matrix */
2861     jj      = rstart;
2862     smycols = mycols;
2863     svals   = vals;
2864     for (i=0; i<m; i++) {
2865       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2866       smycols += ourlens[i];
2867       svals   += ourlens[i];
2868       jj++;
2869     }
2870 
2871     /* read in other processors and ship out */
2872     for (i=1; i<size; i++) {
2873       nz   = procsnz[i];
2874       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2875       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2876     }
2877     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2878   } else {
2879     /* receive numeric values */
2880     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2881 
2882     /* receive message of values*/
2883     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2884     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2885     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2886 
2887     /* insert into matrix */
2888     jj      = rstart;
2889     smycols = mycols;
2890     svals   = vals;
2891     for (i=0; i<m; i++) {
2892       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2893       smycols += ourlens[i];
2894       svals   += ourlens[i];
2895       jj++;
2896     }
2897   }
2898   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2899   ierr = PetscFree(vals);CHKERRQ(ierr);
2900   ierr = PetscFree(mycols);CHKERRQ(ierr);
2901   ierr = PetscFree(rowners);CHKERRQ(ierr);
2902 
2903   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2904   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2905   *newmat = A;
2906   PetscFunctionReturn(0);
2907 }
2908 
2909 #undef __FUNCT__
2910 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2911 /*
2912     Not great since it makes two copies of the submatrix, first an SeqAIJ
2913   in local and then by concatenating the local matrices the end result.
2914   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2915 */
2916 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2917 {
2918   PetscErrorCode ierr;
2919   PetscMPIInt    rank,size;
2920   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2921   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2922   Mat            *local,M,Mreuse;
2923   MatScalar      *vwork,*aa;
2924   MPI_Comm       comm = ((PetscObject)mat)->comm;
2925   Mat_SeqAIJ     *aij;
2926 
2927 
2928   PetscFunctionBegin;
2929   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2930   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2931 
2932   if (call ==  MAT_REUSE_MATRIX) {
2933     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2934     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2935     local = &Mreuse;
2936     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2937   } else {
2938     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2939     Mreuse = *local;
2940     ierr   = PetscFree(local);CHKERRQ(ierr);
2941   }
2942 
2943   /*
2944       m - number of local rows
2945       n - number of columns (same on all processors)
2946       rstart - first row in new global matrix generated
2947   */
2948   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2949   if (call == MAT_INITIAL_MATRIX) {
2950     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2951     ii  = aij->i;
2952     jj  = aij->j;
2953 
2954     /*
2955         Determine the number of non-zeros in the diagonal and off-diagonal
2956         portions of the matrix in order to do correct preallocation
2957     */
2958 
2959     /* first get start and end of "diagonal" columns */
2960     if (csize == PETSC_DECIDE) {
2961       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2962       if (mglobal == n) { /* square matrix */
2963 	nlocal = m;
2964       } else {
2965         nlocal = n/size + ((n % size) > rank);
2966       }
2967     } else {
2968       nlocal = csize;
2969     }
2970     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2971     rstart = rend - nlocal;
2972     if (rank == size - 1 && rend != n) {
2973       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2974     }
2975 
2976     /* next, compute all the lengths */
2977     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2978     olens = dlens + m;
2979     for (i=0; i<m; i++) {
2980       jend = ii[i+1] - ii[i];
2981       olen = 0;
2982       dlen = 0;
2983       for (j=0; j<jend; j++) {
2984         if (*jj < rstart || *jj >= rend) olen++;
2985         else dlen++;
2986         jj++;
2987       }
2988       olens[i] = olen;
2989       dlens[i] = dlen;
2990     }
2991     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2992     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2993     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2994     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2995     ierr = PetscFree(dlens);CHKERRQ(ierr);
2996   } else {
2997     PetscInt ml,nl;
2998 
2999     M = *newmat;
3000     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3001     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3002     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3003     /*
3004          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3005        rather than the slower MatSetValues().
3006     */
3007     M->was_assembled = PETSC_TRUE;
3008     M->assembled     = PETSC_FALSE;
3009   }
3010   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3011   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3012   ii  = aij->i;
3013   jj  = aij->j;
3014   aa  = aij->a;
3015   for (i=0; i<m; i++) {
3016     row   = rstart + i;
3017     nz    = ii[i+1] - ii[i];
3018     cwork = jj;     jj += nz;
3019     vwork = aa;     aa += nz;
3020     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3021   }
3022 
3023   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3024   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3025   *newmat = M;
3026 
3027   /* save submatrix used in processor for next request */
3028   if (call ==  MAT_INITIAL_MATRIX) {
3029     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3030     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3031   }
3032 
3033   PetscFunctionReturn(0);
3034 }
3035 
3036 EXTERN_C_BEGIN
3037 #undef __FUNCT__
3038 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3039 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3040 {
3041   PetscInt       m,cstart, cend,j,nnz,i,d;
3042   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3043   const PetscInt *JJ;
3044   PetscScalar    *values;
3045   PetscErrorCode ierr;
3046 
3047   PetscFunctionBegin;
3048   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3049 
3050   B->rmap->bs = B->cmap->bs = 1;
3051   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
3052   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
3053   m      = B->rmap->n;
3054   cstart = B->cmap->rstart;
3055   cend   = B->cmap->rend;
3056   rstart = B->rmap->rstart;
3057 
3058   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
3059   o_nnz = d_nnz + m;
3060 
3061 #if defined(PETSC_USE_DEBUGGING)
3062   for (i=0; i<m; i++) {
3063     nnz     = Ii[i+1]- Ii[i];
3064     JJ      = J + Ii[i];
3065     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3066     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3067     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3068     for (j=1; j<nnz; j++) {
3069       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
3070     }
3071   }
3072 #endif
3073 
3074   for (i=0; i<m; i++) {
3075     nnz     = Ii[i+1]- Ii[i];
3076     JJ      = J + Ii[i];
3077     nnz_max = PetscMax(nnz_max,nnz);
3078     for (j=0; j<nnz; j++) {
3079       if (*JJ >= cstart) break;
3080       JJ++;
3081     }
3082     d = 0;
3083     for (; j<nnz; j++) {
3084       if (*JJ++ >= cend) break;
3085       d++;
3086     }
3087     d_nnz[i] = d;
3088     o_nnz[i] = nnz - d;
3089   }
3090   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3091   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
3092 
3093   if (v) values = (PetscScalar*)v;
3094   else {
3095     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3096     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3097   }
3098 
3099   for (i=0; i<m; i++) {
3100     ii   = i + rstart;
3101     nnz  = Ii[i+1]- Ii[i];
3102     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3103   }
3104   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3105   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3106 
3107   if (!v) {
3108     ierr = PetscFree(values);CHKERRQ(ierr);
3109   }
3110   PetscFunctionReturn(0);
3111 }
3112 EXTERN_C_END
3113 
3114 #undef __FUNCT__
3115 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3116 /*@
3117    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3118    (the default parallel PETSc format).
3119 
3120    Collective on MPI_Comm
3121 
3122    Input Parameters:
3123 +  B - the matrix
3124 .  i - the indices into j for the start of each local row (starts with zero)
3125 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3126 -  v - optional values in the matrix
3127 
3128    Level: developer
3129 
3130    Notes:
3131        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3132      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3133      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3134 
3135        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3136 
3137        The format which is used for the sparse matrix input, is equivalent to a
3138     row-major ordering.. i.e for the following matrix, the input data expected is
3139     as shown:
3140 
3141         1 0 0
3142         2 0 3     P0
3143        -------
3144         4 5 6     P1
3145 
3146      Process0 [P0]: rows_owned=[0,1]
3147         i =  {0,1,3}  [size = nrow+1  = 2+1]
3148         j =  {0,0,2}  [size = nz = 6]
3149         v =  {1,2,3}  [size = nz = 6]
3150 
3151      Process1 [P1]: rows_owned=[2]
3152         i =  {0,3}    [size = nrow+1  = 1+1]
3153         j =  {0,1,2}  [size = nz = 6]
3154         v =  {4,5,6}  [size = nz = 6]
3155 
3156       The column indices for each row MUST be sorted.
3157 
3158 .keywords: matrix, aij, compressed row, sparse, parallel
3159 
3160 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3161           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3162 @*/
3163 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3164 {
3165   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3166 
3167   PetscFunctionBegin;
3168   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3169   if (f) {
3170     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3171   }
3172   PetscFunctionReturn(0);
3173 }
3174 
3175 #undef __FUNCT__
3176 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3177 /*@C
3178    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3179    (the default parallel PETSc format).  For good matrix assembly performance
3180    the user should preallocate the matrix storage by setting the parameters
3181    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3182    performance can be increased by more than a factor of 50.
3183 
3184    Collective on MPI_Comm
3185 
3186    Input Parameters:
3187 +  A - the matrix
3188 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3189            (same value is used for all local rows)
3190 .  d_nnz - array containing the number of nonzeros in the various rows of the
3191            DIAGONAL portion of the local submatrix (possibly different for each row)
3192            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3193            The size of this array is equal to the number of local rows, i.e 'm'.
3194            You must leave room for the diagonal entry even if it is zero.
3195 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3196            submatrix (same value is used for all local rows).
3197 -  o_nnz - array containing the number of nonzeros in the various rows of the
3198            OFF-DIAGONAL portion of the local submatrix (possibly different for
3199            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3200            structure. The size of this array is equal to the number
3201            of local rows, i.e 'm'.
3202 
3203    If the *_nnz parameter is given then the *_nz parameter is ignored
3204 
3205    The AIJ format (also called the Yale sparse matrix format or
3206    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3207    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3208 
3209    The parallel matrix is partitioned such that the first m0 rows belong to
3210    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3211    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3212 
3213    The DIAGONAL portion of the local submatrix of a processor can be defined
3214    as the submatrix which is obtained by extraction the part corresponding
3215    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3216    first row that belongs to the processor, and r2 is the last row belonging
3217    to the this processor. This is a square mxm matrix. The remaining portion
3218    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3219 
3220    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3221 
3222    You can call MatGetInfo() to get information on how effective the preallocation was;
3223    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3224    You can also run with the option -info and look for messages with the string
3225    malloc in them to see if additional memory allocation was needed.
3226 
3227    Example usage:
3228 
3229    Consider the following 8x8 matrix with 34 non-zero values, that is
3230    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3231    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3232    as follows:
3233 
3234 .vb
3235             1  2  0  |  0  3  0  |  0  4
3236     Proc0   0  5  6  |  7  0  0  |  8  0
3237             9  0 10  | 11  0  0  | 12  0
3238     -------------------------------------
3239            13  0 14  | 15 16 17  |  0  0
3240     Proc1   0 18  0  | 19 20 21  |  0  0
3241             0  0  0  | 22 23  0  | 24  0
3242     -------------------------------------
3243     Proc2  25 26 27  |  0  0 28  | 29  0
3244            30  0  0  | 31 32 33  |  0 34
3245 .ve
3246 
3247    This can be represented as a collection of submatrices as:
3248 
3249 .vb
3250       A B C
3251       D E F
3252       G H I
3253 .ve
3254 
3255    Where the submatrices A,B,C are owned by proc0, D,E,F are
3256    owned by proc1, G,H,I are owned by proc2.
3257 
3258    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3259    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3260    The 'M','N' parameters are 8,8, and have the same values on all procs.
3261 
3262    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3263    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3264    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3265    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3266    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3267    matrix, ans [DF] as another SeqAIJ matrix.
3268 
3269    When d_nz, o_nz parameters are specified, d_nz storage elements are
3270    allocated for every row of the local diagonal submatrix, and o_nz
3271    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3272    One way to choose d_nz and o_nz is to use the max nonzerors per local
3273    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3274    In this case, the values of d_nz,o_nz are:
3275 .vb
3276      proc0 : dnz = 2, o_nz = 2
3277      proc1 : dnz = 3, o_nz = 2
3278      proc2 : dnz = 1, o_nz = 4
3279 .ve
3280    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3281    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3282    for proc3. i.e we are using 12+15+10=37 storage locations to store
3283    34 values.
3284 
3285    When d_nnz, o_nnz parameters are specified, the storage is specified
3286    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3287    In the above case the values for d_nnz,o_nnz are:
3288 .vb
3289      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3290      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3291      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3292 .ve
3293    Here the space allocated is sum of all the above values i.e 34, and
3294    hence pre-allocation is perfect.
3295 
3296    Level: intermediate
3297 
3298 .keywords: matrix, aij, compressed row, sparse, parallel
3299 
3300 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3301           MPIAIJ, MatGetInfo()
3302 @*/
3303 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3304 {
3305   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3306 
3307   PetscFunctionBegin;
3308   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3309   if (f) {
3310     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3311   }
3312   PetscFunctionReturn(0);
3313 }
3314 
3315 #undef __FUNCT__
3316 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3317 /*@
3318      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3319          CSR format the local rows.
3320 
3321    Collective on MPI_Comm
3322 
3323    Input Parameters:
3324 +  comm - MPI communicator
3325 .  m - number of local rows (Cannot be PETSC_DECIDE)
3326 .  n - This value should be the same as the local size used in creating the
3327        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3328        calculated if N is given) For square matrices n is almost always m.
3329 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3330 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3331 .   i - row indices
3332 .   j - column indices
3333 -   a - matrix values
3334 
3335    Output Parameter:
3336 .   mat - the matrix
3337 
3338    Level: intermediate
3339 
3340    Notes:
3341        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3342      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3343      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3344 
3345        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3346 
3347        The format which is used for the sparse matrix input, is equivalent to a
3348     row-major ordering.. i.e for the following matrix, the input data expected is
3349     as shown:
3350 
3351         1 0 0
3352         2 0 3     P0
3353        -------
3354         4 5 6     P1
3355 
3356      Process0 [P0]: rows_owned=[0,1]
3357         i =  {0,1,3}  [size = nrow+1  = 2+1]
3358         j =  {0,0,2}  [size = nz = 6]
3359         v =  {1,2,3}  [size = nz = 6]
3360 
3361      Process1 [P1]: rows_owned=[2]
3362         i =  {0,3}    [size = nrow+1  = 1+1]
3363         j =  {0,1,2}  [size = nz = 6]
3364         v =  {4,5,6}  [size = nz = 6]
3365 
3366 .keywords: matrix, aij, compressed row, sparse, parallel
3367 
3368 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3369           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3370 @*/
3371 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3372 {
3373   PetscErrorCode ierr;
3374 
3375  PetscFunctionBegin;
3376   if (i[0]) {
3377     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3378   }
3379   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3380   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3381   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3382   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3383   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3384   PetscFunctionReturn(0);
3385 }
3386 
3387 #undef __FUNCT__
3388 #define __FUNCT__ "MatCreateMPIAIJ"
3389 /*@C
3390    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3391    (the default parallel PETSc format).  For good matrix assembly performance
3392    the user should preallocate the matrix storage by setting the parameters
3393    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3394    performance can be increased by more than a factor of 50.
3395 
3396    Collective on MPI_Comm
3397 
3398    Input Parameters:
3399 +  comm - MPI communicator
3400 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3401            This value should be the same as the local size used in creating the
3402            y vector for the matrix-vector product y = Ax.
3403 .  n - This value should be the same as the local size used in creating the
3404        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3405        calculated if N is given) For square matrices n is almost always m.
3406 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3407 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3408 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3409            (same value is used for all local rows)
3410 .  d_nnz - array containing the number of nonzeros in the various rows of the
3411            DIAGONAL portion of the local submatrix (possibly different for each row)
3412            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3413            The size of this array is equal to the number of local rows, i.e 'm'.
3414            You must leave room for the diagonal entry even if it is zero.
3415 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3416            submatrix (same value is used for all local rows).
3417 -  o_nnz - array containing the number of nonzeros in the various rows of the
3418            OFF-DIAGONAL portion of the local submatrix (possibly different for
3419            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3420            structure. The size of this array is equal to the number
3421            of local rows, i.e 'm'.
3422 
3423    Output Parameter:
3424 .  A - the matrix
3425 
3426    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3427    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3428    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3429    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3430 
3431    Notes:
3432    If the *_nnz parameter is given then the *_nz parameter is ignored
3433 
3434    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3435    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3436    storage requirements for this matrix.
3437 
3438    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3439    processor than it must be used on all processors that share the object for
3440    that argument.
3441 
3442    The user MUST specify either the local or global matrix dimensions
3443    (possibly both).
3444 
3445    The parallel matrix is partitioned across processors such that the
3446    first m0 rows belong to process 0, the next m1 rows belong to
3447    process 1, the next m2 rows belong to process 2 etc.. where
3448    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3449    values corresponding to [m x N] submatrix.
3450 
3451    The columns are logically partitioned with the n0 columns belonging
3452    to 0th partition, the next n1 columns belonging to the next
3453    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3454 
3455    The DIAGONAL portion of the local submatrix on any given processor
3456    is the submatrix corresponding to the rows and columns m,n
3457    corresponding to the given processor. i.e diagonal matrix on
3458    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3459    etc. The remaining portion of the local submatrix [m x (N-n)]
3460    constitute the OFF-DIAGONAL portion. The example below better
3461    illustrates this concept.
3462 
3463    For a square global matrix we define each processor's diagonal portion
3464    to be its local rows and the corresponding columns (a square submatrix);
3465    each processor's off-diagonal portion encompasses the remainder of the
3466    local matrix (a rectangular submatrix).
3467 
3468    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3469 
3470    When calling this routine with a single process communicator, a matrix of
3471    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3472    type of communicator, use the construction mechanism:
3473      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3474 
3475    By default, this format uses inodes (identical nodes) when possible.
3476    We search for consecutive rows with the same nonzero structure, thereby
3477    reusing matrix information to achieve increased efficiency.
3478 
3479    Options Database Keys:
3480 +  -mat_no_inode  - Do not use inodes
3481 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3482 -  -mat_aij_oneindex - Internally use indexing starting at 1
3483         rather than 0.  Note that when calling MatSetValues(),
3484         the user still MUST index entries starting at 0!
3485 
3486 
3487    Example usage:
3488 
3489    Consider the following 8x8 matrix with 34 non-zero values, that is
3490    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3491    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3492    as follows:
3493 
3494 .vb
3495             1  2  0  |  0  3  0  |  0  4
3496     Proc0   0  5  6  |  7  0  0  |  8  0
3497             9  0 10  | 11  0  0  | 12  0
3498     -------------------------------------
3499            13  0 14  | 15 16 17  |  0  0
3500     Proc1   0 18  0  | 19 20 21  |  0  0
3501             0  0  0  | 22 23  0  | 24  0
3502     -------------------------------------
3503     Proc2  25 26 27  |  0  0 28  | 29  0
3504            30  0  0  | 31 32 33  |  0 34
3505 .ve
3506 
3507    This can be represented as a collection of submatrices as:
3508 
3509 .vb
3510       A B C
3511       D E F
3512       G H I
3513 .ve
3514 
3515    Where the submatrices A,B,C are owned by proc0, D,E,F are
3516    owned by proc1, G,H,I are owned by proc2.
3517 
3518    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3519    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3520    The 'M','N' parameters are 8,8, and have the same values on all procs.
3521 
3522    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3523    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3524    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3525    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3526    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3527    matrix, ans [DF] as another SeqAIJ matrix.
3528 
3529    When d_nz, o_nz parameters are specified, d_nz storage elements are
3530    allocated for every row of the local diagonal submatrix, and o_nz
3531    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3532    One way to choose d_nz and o_nz is to use the max nonzerors per local
3533    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3534    In this case, the values of d_nz,o_nz are:
3535 .vb
3536      proc0 : dnz = 2, o_nz = 2
3537      proc1 : dnz = 3, o_nz = 2
3538      proc2 : dnz = 1, o_nz = 4
3539 .ve
3540    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3541    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3542    for proc3. i.e we are using 12+15+10=37 storage locations to store
3543    34 values.
3544 
3545    When d_nnz, o_nnz parameters are specified, the storage is specified
3546    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3547    In the above case the values for d_nnz,o_nnz are:
3548 .vb
3549      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3550      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3551      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3552 .ve
3553    Here the space allocated is sum of all the above values i.e 34, and
3554    hence pre-allocation is perfect.
3555 
3556    Level: intermediate
3557 
3558 .keywords: matrix, aij, compressed row, sparse, parallel
3559 
3560 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3561           MPIAIJ, MatCreateMPIAIJWithArrays()
3562 @*/
3563 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3564 {
3565   PetscErrorCode ierr;
3566   PetscMPIInt    size;
3567 
3568   PetscFunctionBegin;
3569   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3570   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3571   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3572   if (size > 1) {
3573     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3574     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3575   } else {
3576     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3577     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3578   }
3579   PetscFunctionReturn(0);
3580 }
3581 
3582 #undef __FUNCT__
3583 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3584 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3585 {
3586   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3587 
3588   PetscFunctionBegin;
3589   *Ad     = a->A;
3590   *Ao     = a->B;
3591   *colmap = a->garray;
3592   PetscFunctionReturn(0);
3593 }
3594 
3595 #undef __FUNCT__
3596 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3597 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3598 {
3599   PetscErrorCode ierr;
3600   PetscInt       i;
3601   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3602 
3603   PetscFunctionBegin;
3604   if (coloring->ctype == IS_COLORING_GLOBAL) {
3605     ISColoringValue *allcolors,*colors;
3606     ISColoring      ocoloring;
3607 
3608     /* set coloring for diagonal portion */
3609     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3610 
3611     /* set coloring for off-diagonal portion */
3612     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3613     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3614     for (i=0; i<a->B->cmap->n; i++) {
3615       colors[i] = allcolors[a->garray[i]];
3616     }
3617     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3618     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3619     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3620     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3621   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3622     ISColoringValue *colors;
3623     PetscInt        *larray;
3624     ISColoring      ocoloring;
3625 
3626     /* set coloring for diagonal portion */
3627     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3628     for (i=0; i<a->A->cmap->n; i++) {
3629       larray[i] = i + A->cmap->rstart;
3630     }
3631     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3632     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3633     for (i=0; i<a->A->cmap->n; i++) {
3634       colors[i] = coloring->colors[larray[i]];
3635     }
3636     ierr = PetscFree(larray);CHKERRQ(ierr);
3637     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3638     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3639     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3640 
3641     /* set coloring for off-diagonal portion */
3642     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3643     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3644     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3645     for (i=0; i<a->B->cmap->n; i++) {
3646       colors[i] = coloring->colors[larray[i]];
3647     }
3648     ierr = PetscFree(larray);CHKERRQ(ierr);
3649     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3650     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3651     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3652   } else {
3653     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3654   }
3655 
3656   PetscFunctionReturn(0);
3657 }
3658 
3659 #if defined(PETSC_HAVE_ADIC)
3660 #undef __FUNCT__
3661 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3662 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3663 {
3664   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3665   PetscErrorCode ierr;
3666 
3667   PetscFunctionBegin;
3668   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3669   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3670   PetscFunctionReturn(0);
3671 }
3672 #endif
3673 
3674 #undef __FUNCT__
3675 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3676 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3677 {
3678   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3679   PetscErrorCode ierr;
3680 
3681   PetscFunctionBegin;
3682   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3683   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3684   PetscFunctionReturn(0);
3685 }
3686 
3687 #undef __FUNCT__
3688 #define __FUNCT__ "MatMerge"
3689 /*@
3690       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3691                  matrices from each processor
3692 
3693     Collective on MPI_Comm
3694 
3695    Input Parameters:
3696 +    comm - the communicators the parallel matrix will live on
3697 .    inmat - the input sequential matrices
3698 .    n - number of local columns (or PETSC_DECIDE)
3699 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3700 
3701    Output Parameter:
3702 .    outmat - the parallel matrix generated
3703 
3704     Level: advanced
3705 
3706    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3707 
3708 @*/
3709 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3710 {
3711   PetscErrorCode ierr;
3712   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3713   PetscInt       *indx;
3714   PetscScalar    *values;
3715 
3716   PetscFunctionBegin;
3717   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3718   if (scall == MAT_INITIAL_MATRIX){
3719     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3720     if (n == PETSC_DECIDE){
3721       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3722     }
3723     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3724     rstart -= m;
3725 
3726     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3727     for (i=0;i<m;i++) {
3728       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3729       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3730       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3731     }
3732     /* This routine will ONLY return MPIAIJ type matrix */
3733     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3734     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3735     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3736     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3737     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3738 
3739   } else if (scall == MAT_REUSE_MATRIX){
3740     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3741   } else {
3742     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3743   }
3744 
3745   for (i=0;i<m;i++) {
3746     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3747     Ii    = i + rstart;
3748     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3749     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3750   }
3751   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3752   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3753   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3754 
3755   PetscFunctionReturn(0);
3756 }
3757 
3758 #undef __FUNCT__
3759 #define __FUNCT__ "MatFileSplit"
3760 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3761 {
3762   PetscErrorCode    ierr;
3763   PetscMPIInt       rank;
3764   PetscInt          m,N,i,rstart,nnz;
3765   size_t            len;
3766   const PetscInt    *indx;
3767   PetscViewer       out;
3768   char              *name;
3769   Mat               B;
3770   const PetscScalar *values;
3771 
3772   PetscFunctionBegin;
3773   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3774   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3775   /* Should this be the type of the diagonal block of A? */
3776   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3777   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3778   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3779   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3780   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3781   for (i=0;i<m;i++) {
3782     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3783     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3784     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3785   }
3786   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3787   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3788 
3789   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3790   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3791   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3792   sprintf(name,"%s.%d",outfile,rank);
3793   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3794   ierr = PetscFree(name);
3795   ierr = MatView(B,out);CHKERRQ(ierr);
3796   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3797   ierr = MatDestroy(B);CHKERRQ(ierr);
3798   PetscFunctionReturn(0);
3799 }
3800 
3801 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3802 #undef __FUNCT__
3803 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3804 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3805 {
3806   PetscErrorCode       ierr;
3807   Mat_Merge_SeqsToMPI  *merge;
3808   PetscContainer       container;
3809 
3810   PetscFunctionBegin;
3811   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3812   if (container) {
3813     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3814     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3815     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3816     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3817     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3818     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3819     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3820     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3821     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3822     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3823     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3824     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3825 
3826     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3827     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3828   }
3829   ierr = PetscFree(merge);CHKERRQ(ierr);
3830 
3831   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3832   PetscFunctionReturn(0);
3833 }
3834 
3835 #include "src/mat/utils/freespace.h"
3836 #include "petscbt.h"
3837 
3838 #undef __FUNCT__
3839 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3840 /*@C
3841       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3842                  matrices from each processor
3843 
3844     Collective on MPI_Comm
3845 
3846    Input Parameters:
3847 +    comm - the communicators the parallel matrix will live on
3848 .    seqmat - the input sequential matrices
3849 .    m - number of local rows (or PETSC_DECIDE)
3850 .    n - number of local columns (or PETSC_DECIDE)
3851 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3852 
3853    Output Parameter:
3854 .    mpimat - the parallel matrix generated
3855 
3856     Level: advanced
3857 
3858    Notes:
3859      The dimensions of the sequential matrix in each processor MUST be the same.
3860      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3861      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3862 @*/
3863 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3864 {
3865   PetscErrorCode       ierr;
3866   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3867   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3868   PetscMPIInt          size,rank,taga,*len_s;
3869   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
3870   PetscInt             proc,m;
3871   PetscInt             **buf_ri,**buf_rj;
3872   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3873   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3874   MPI_Request          *s_waits,*r_waits;
3875   MPI_Status           *status;
3876   MatScalar            *aa=a->a;
3877   MatScalar            **abuf_r,*ba_i;
3878   Mat_Merge_SeqsToMPI  *merge;
3879   PetscContainer       container;
3880 
3881   PetscFunctionBegin;
3882   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3883 
3884   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3885   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3886 
3887   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3888   if (container) {
3889     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3890   }
3891   bi     = merge->bi;
3892   bj     = merge->bj;
3893   buf_ri = merge->buf_ri;
3894   buf_rj = merge->buf_rj;
3895 
3896   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3897   owners = merge->rowmap.range;
3898   len_s  = merge->len_s;
3899 
3900   /* send and recv matrix values */
3901   /*-----------------------------*/
3902   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3903   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3904 
3905   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3906   for (proc=0,k=0; proc<size; proc++){
3907     if (!len_s[proc]) continue;
3908     i = owners[proc];
3909     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3910     k++;
3911   }
3912 
3913   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3914   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3915   ierr = PetscFree(status);CHKERRQ(ierr);
3916 
3917   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3918   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3919 
3920   /* insert mat values of mpimat */
3921   /*----------------------------*/
3922   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
3923   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3924   nextrow = buf_ri_k + merge->nrecv;
3925   nextai  = nextrow + merge->nrecv;
3926 
3927   for (k=0; k<merge->nrecv; k++){
3928     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3929     nrows = *(buf_ri_k[k]);
3930     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3931     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3932   }
3933 
3934   /* set values of ba */
3935   m = merge->rowmap.n;
3936   for (i=0; i<m; i++) {
3937     arow = owners[rank] + i;
3938     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3939     bnzi = bi[i+1] - bi[i];
3940     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3941 
3942     /* add local non-zero vals of this proc's seqmat into ba */
3943     anzi = ai[arow+1] - ai[arow];
3944     aj   = a->j + ai[arow];
3945     aa   = a->a + ai[arow];
3946     nextaj = 0;
3947     for (j=0; nextaj<anzi; j++){
3948       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3949         ba_i[j] += aa[nextaj++];
3950       }
3951     }
3952 
3953     /* add received vals into ba */
3954     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3955       /* i-th row */
3956       if (i == *nextrow[k]) {
3957         anzi = *(nextai[k]+1) - *nextai[k];
3958         aj   = buf_rj[k] + *(nextai[k]);
3959         aa   = abuf_r[k] + *(nextai[k]);
3960         nextaj = 0;
3961         for (j=0; nextaj<anzi; j++){
3962           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3963             ba_i[j] += aa[nextaj++];
3964           }
3965         }
3966         nextrow[k]++; nextai[k]++;
3967       }
3968     }
3969     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3970   }
3971   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3972   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3973 
3974   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3975   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3976   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3977   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3978   PetscFunctionReturn(0);
3979 }
3980 
3981 #undef __FUNCT__
3982 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3983 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3984 {
3985   PetscErrorCode       ierr;
3986   Mat                  B_mpi;
3987   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3988   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3989   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3990   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
3991   PetscInt             len,proc,*dnz,*onz;
3992   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3993   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3994   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3995   MPI_Status           *status;
3996   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3997   PetscBT              lnkbt;
3998   Mat_Merge_SeqsToMPI  *merge;
3999   PetscContainer       container;
4000 
4001   PetscFunctionBegin;
4002   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4003 
4004   /* make sure it is a PETSc comm */
4005   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4006   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4007   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4008 
4009   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4010   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4011 
4012   /* determine row ownership */
4013   /*---------------------------------------------------------*/
4014   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
4015   merge->rowmap.n = m;
4016   merge->rowmap.N = M;
4017   merge->rowmap.bs = 1;
4018   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
4019   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4020   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4021 
4022   m      = merge->rowmap.n;
4023   M      = merge->rowmap.N;
4024   owners = merge->rowmap.range;
4025 
4026   /* determine the number of messages to send, their lengths */
4027   /*---------------------------------------------------------*/
4028   len_s  = merge->len_s;
4029 
4030   len = 0;  /* length of buf_si[] */
4031   merge->nsend = 0;
4032   for (proc=0; proc<size; proc++){
4033     len_si[proc] = 0;
4034     if (proc == rank){
4035       len_s[proc] = 0;
4036     } else {
4037       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4038       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4039     }
4040     if (len_s[proc]) {
4041       merge->nsend++;
4042       nrows = 0;
4043       for (i=owners[proc]; i<owners[proc+1]; i++){
4044         if (ai[i+1] > ai[i]) nrows++;
4045       }
4046       len_si[proc] = 2*(nrows+1);
4047       len += len_si[proc];
4048     }
4049   }
4050 
4051   /* determine the number and length of messages to receive for ij-structure */
4052   /*-------------------------------------------------------------------------*/
4053   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4054   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4055 
4056   /* post the Irecv of j-structure */
4057   /*-------------------------------*/
4058   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4059   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4060 
4061   /* post the Isend of j-structure */
4062   /*--------------------------------*/
4063   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
4064   sj_waits = si_waits + merge->nsend;
4065 
4066   for (proc=0, k=0; proc<size; proc++){
4067     if (!len_s[proc]) continue;
4068     i = owners[proc];
4069     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4070     k++;
4071   }
4072 
4073   /* receives and sends of j-structure are complete */
4074   /*------------------------------------------------*/
4075   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4076   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4077 
4078   /* send and recv i-structure */
4079   /*---------------------------*/
4080   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4081   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4082 
4083   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4084   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4085   for (proc=0,k=0; proc<size; proc++){
4086     if (!len_s[proc]) continue;
4087     /* form outgoing message for i-structure:
4088          buf_si[0]:                 nrows to be sent
4089                [1:nrows]:           row index (global)
4090                [nrows+1:2*nrows+1]: i-structure index
4091     */
4092     /*-------------------------------------------*/
4093     nrows = len_si[proc]/2 - 1;
4094     buf_si_i    = buf_si + nrows+1;
4095     buf_si[0]   = nrows;
4096     buf_si_i[0] = 0;
4097     nrows = 0;
4098     for (i=owners[proc]; i<owners[proc+1]; i++){
4099       anzi = ai[i+1] - ai[i];
4100       if (anzi) {
4101         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4102         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4103         nrows++;
4104       }
4105     }
4106     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4107     k++;
4108     buf_si += len_si[proc];
4109   }
4110 
4111   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4112   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4113 
4114   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4115   for (i=0; i<merge->nrecv; i++){
4116     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4117   }
4118 
4119   ierr = PetscFree(len_si);CHKERRQ(ierr);
4120   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4121   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4122   ierr = PetscFree(si_waits);CHKERRQ(ierr);
4123   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4124   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4125   ierr = PetscFree(status);CHKERRQ(ierr);
4126 
4127   /* compute a local seq matrix in each processor */
4128   /*----------------------------------------------*/
4129   /* allocate bi array and free space for accumulating nonzero column info */
4130   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4131   bi[0] = 0;
4132 
4133   /* create and initialize a linked list */
4134   nlnk = N+1;
4135   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4136 
4137   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4138   len = 0;
4139   len  = ai[owners[rank+1]] - ai[owners[rank]];
4140   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4141   current_space = free_space;
4142 
4143   /* determine symbolic info for each local row */
4144   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
4145   nextrow = buf_ri_k + merge->nrecv;
4146   nextai  = nextrow + merge->nrecv;
4147   for (k=0; k<merge->nrecv; k++){
4148     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4149     nrows = *buf_ri_k[k];
4150     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4151     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4152   }
4153 
4154   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4155   len = 0;
4156   for (i=0;i<m;i++) {
4157     bnzi   = 0;
4158     /* add local non-zero cols of this proc's seqmat into lnk */
4159     arow   = owners[rank] + i;
4160     anzi   = ai[arow+1] - ai[arow];
4161     aj     = a->j + ai[arow];
4162     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4163     bnzi += nlnk;
4164     /* add received col data into lnk */
4165     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4166       if (i == *nextrow[k]) { /* i-th row */
4167         anzi = *(nextai[k]+1) - *nextai[k];
4168         aj   = buf_rj[k] + *nextai[k];
4169         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4170         bnzi += nlnk;
4171         nextrow[k]++; nextai[k]++;
4172       }
4173     }
4174     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4175 
4176     /* if free space is not available, make more free space */
4177     if (current_space->local_remaining<bnzi) {
4178       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4179       nspacedouble++;
4180     }
4181     /* copy data into free space, then initialize lnk */
4182     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4183     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4184 
4185     current_space->array           += bnzi;
4186     current_space->local_used      += bnzi;
4187     current_space->local_remaining -= bnzi;
4188 
4189     bi[i+1] = bi[i] + bnzi;
4190   }
4191 
4192   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
4193 
4194   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4195   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4196   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4197 
4198   /* create symbolic parallel matrix B_mpi */
4199   /*---------------------------------------*/
4200   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4201   if (n==PETSC_DECIDE) {
4202     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4203   } else {
4204     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4205   }
4206   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4207   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4208   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4209 
4210   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4211   B_mpi->assembled     = PETSC_FALSE;
4212   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4213   merge->bi            = bi;
4214   merge->bj            = bj;
4215   merge->buf_ri        = buf_ri;
4216   merge->buf_rj        = buf_rj;
4217   merge->coi           = PETSC_NULL;
4218   merge->coj           = PETSC_NULL;
4219   merge->owners_co     = PETSC_NULL;
4220 
4221   /* attach the supporting struct to B_mpi for reuse */
4222   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4223   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4224   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4225   *mpimat = B_mpi;
4226 
4227   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4228   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4229   PetscFunctionReturn(0);
4230 }
4231 
4232 #undef __FUNCT__
4233 #define __FUNCT__ "MatMerge_SeqsToMPI"
4234 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4235 {
4236   PetscErrorCode   ierr;
4237 
4238   PetscFunctionBegin;
4239   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4240   if (scall == MAT_INITIAL_MATRIX){
4241     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4242   }
4243   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4244   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4245   PetscFunctionReturn(0);
4246 }
4247 
4248 #undef __FUNCT__
4249 #define __FUNCT__ "MatGetLocalMat"
4250 /*@
4251      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4252 
4253     Not Collective
4254 
4255    Input Parameters:
4256 +    A - the matrix
4257 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4258 
4259    Output Parameter:
4260 .    A_loc - the local sequential matrix generated
4261 
4262     Level: developer
4263 
4264 @*/
4265 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4266 {
4267   PetscErrorCode  ierr;
4268   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4269   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4270   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4271   MatScalar       *aa=a->a,*ba=b->a,*cam;
4272   PetscScalar     *ca;
4273   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4274   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4275 
4276   PetscFunctionBegin;
4277   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4278   if (scall == MAT_INITIAL_MATRIX){
4279     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4280     ci[0] = 0;
4281     for (i=0; i<am; i++){
4282       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4283     }
4284     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4285     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4286     k = 0;
4287     for (i=0; i<am; i++) {
4288       ncols_o = bi[i+1] - bi[i];
4289       ncols_d = ai[i+1] - ai[i];
4290       /* off-diagonal portion of A */
4291       for (jo=0; jo<ncols_o; jo++) {
4292         col = cmap[*bj];
4293         if (col >= cstart) break;
4294         cj[k]   = col; bj++;
4295         ca[k++] = *ba++;
4296       }
4297       /* diagonal portion of A */
4298       for (j=0; j<ncols_d; j++) {
4299         cj[k]   = cstart + *aj++;
4300         ca[k++] = *aa++;
4301       }
4302       /* off-diagonal portion of A */
4303       for (j=jo; j<ncols_o; j++) {
4304         cj[k]   = cmap[*bj++];
4305         ca[k++] = *ba++;
4306       }
4307     }
4308     /* put together the new matrix */
4309     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4310     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4311     /* Since these are PETSc arrays, change flags to free them as necessary. */
4312     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4313     mat->free_a  = PETSC_TRUE;
4314     mat->free_ij = PETSC_TRUE;
4315     mat->nonew   = 0;
4316   } else if (scall == MAT_REUSE_MATRIX){
4317     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4318     ci = mat->i; cj = mat->j; cam = mat->a;
4319     for (i=0; i<am; i++) {
4320       /* off-diagonal portion of A */
4321       ncols_o = bi[i+1] - bi[i];
4322       for (jo=0; jo<ncols_o; jo++) {
4323         col = cmap[*bj];
4324         if (col >= cstart) break;
4325         *cam++ = *ba++; bj++;
4326       }
4327       /* diagonal portion of A */
4328       ncols_d = ai[i+1] - ai[i];
4329       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4330       /* off-diagonal portion of A */
4331       for (j=jo; j<ncols_o; j++) {
4332         *cam++ = *ba++; bj++;
4333       }
4334     }
4335   } else {
4336     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4337   }
4338 
4339   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4340   PetscFunctionReturn(0);
4341 }
4342 
4343 #undef __FUNCT__
4344 #define __FUNCT__ "MatGetLocalMatCondensed"
4345 /*@C
4346      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4347 
4348     Not Collective
4349 
4350    Input Parameters:
4351 +    A - the matrix
4352 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4353 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4354 
4355    Output Parameter:
4356 .    A_loc - the local sequential matrix generated
4357 
4358     Level: developer
4359 
4360 @*/
4361 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4362 {
4363   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4364   PetscErrorCode    ierr;
4365   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4366   IS                isrowa,iscola;
4367   Mat               *aloc;
4368 
4369   PetscFunctionBegin;
4370   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4371   if (!row){
4372     start = A->rmap->rstart; end = A->rmap->rend;
4373     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4374   } else {
4375     isrowa = *row;
4376   }
4377   if (!col){
4378     start = A->cmap->rstart;
4379     cmap  = a->garray;
4380     nzA   = a->A->cmap->n;
4381     nzB   = a->B->cmap->n;
4382     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4383     ncols = 0;
4384     for (i=0; i<nzB; i++) {
4385       if (cmap[i] < start) idx[ncols++] = cmap[i];
4386       else break;
4387     }
4388     imark = i;
4389     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4390     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4391     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4392     ierr = PetscFree(idx);CHKERRQ(ierr);
4393   } else {
4394     iscola = *col;
4395   }
4396   if (scall != MAT_INITIAL_MATRIX){
4397     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4398     aloc[0] = *A_loc;
4399   }
4400   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4401   *A_loc = aloc[0];
4402   ierr = PetscFree(aloc);CHKERRQ(ierr);
4403   if (!row){
4404     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4405   }
4406   if (!col){
4407     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4408   }
4409   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4410   PetscFunctionReturn(0);
4411 }
4412 
4413 #undef __FUNCT__
4414 #define __FUNCT__ "MatGetBrowsOfAcols"
4415 /*@C
4416     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4417 
4418     Collective on Mat
4419 
4420    Input Parameters:
4421 +    A,B - the matrices in mpiaij format
4422 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4423 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4424 
4425    Output Parameter:
4426 +    rowb, colb - index sets of rows and columns of B to extract
4427 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4428 -    B_seq - the sequential matrix generated
4429 
4430     Level: developer
4431 
4432 @*/
4433 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4434 {
4435   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4436   PetscErrorCode    ierr;
4437   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4438   IS                isrowb,iscolb;
4439   Mat               *bseq;
4440 
4441   PetscFunctionBegin;
4442   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4443     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4444   }
4445   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4446 
4447   if (scall == MAT_INITIAL_MATRIX){
4448     start = A->cmap->rstart;
4449     cmap  = a->garray;
4450     nzA   = a->A->cmap->n;
4451     nzB   = a->B->cmap->n;
4452     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4453     ncols = 0;
4454     for (i=0; i<nzB; i++) {  /* row < local row index */
4455       if (cmap[i] < start) idx[ncols++] = cmap[i];
4456       else break;
4457     }
4458     imark = i;
4459     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4460     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4461     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4462     ierr = PetscFree(idx);CHKERRQ(ierr);
4463     *brstart = imark;
4464     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4465   } else {
4466     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4467     isrowb = *rowb; iscolb = *colb;
4468     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4469     bseq[0] = *B_seq;
4470   }
4471   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4472   *B_seq = bseq[0];
4473   ierr = PetscFree(bseq);CHKERRQ(ierr);
4474   if (!rowb){
4475     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4476   } else {
4477     *rowb = isrowb;
4478   }
4479   if (!colb){
4480     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4481   } else {
4482     *colb = iscolb;
4483   }
4484   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4485   PetscFunctionReturn(0);
4486 }
4487 
4488 #undef __FUNCT__
4489 #define __FUNCT__ "MatGetBrowsOfAoCols"
4490 /*@C
4491     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4492     of the OFF-DIAGONAL portion of local A
4493 
4494     Collective on Mat
4495 
4496    Input Parameters:
4497 +    A,B - the matrices in mpiaij format
4498 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4499 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4500 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4501 
4502    Output Parameter:
4503 +    B_oth - the sequential matrix generated
4504 
4505     Level: developer
4506 
4507 @*/
4508 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,MatScalar **bufa_ptr,Mat *B_oth)
4509 {
4510   VecScatter_MPI_General *gen_to,*gen_from;
4511   PetscErrorCode         ierr;
4512   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4513   Mat_SeqAIJ             *b_oth;
4514   VecScatter             ctx=a->Mvctx;
4515   MPI_Comm               comm=((PetscObject)ctx)->comm;
4516   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4517   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4518   PetscScalar            *rvalues,*svalues;
4519   MatScalar              *b_otha,*bufa,*bufA;
4520   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4521   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4522   MPI_Status             *sstatus,rstatus;
4523   PetscMPIInt            jj;
4524   PetscInt               *cols,sbs,rbs;
4525   PetscScalar            *vals;
4526 
4527   PetscFunctionBegin;
4528   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4529     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4530   }
4531   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4532   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4533 
4534   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4535   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4536   rvalues  = gen_from->values; /* holds the length of receiving row */
4537   svalues  = gen_to->values;   /* holds the length of sending row */
4538   nrecvs   = gen_from->n;
4539   nsends   = gen_to->n;
4540 
4541   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4542   srow     = gen_to->indices;   /* local row index to be sent */
4543   sstarts  = gen_to->starts;
4544   sprocs   = gen_to->procs;
4545   sstatus  = gen_to->sstatus;
4546   sbs      = gen_to->bs;
4547   rstarts  = gen_from->starts;
4548   rprocs   = gen_from->procs;
4549   rbs      = gen_from->bs;
4550 
4551   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4552   if (scall == MAT_INITIAL_MATRIX){
4553     /* i-array */
4554     /*---------*/
4555     /*  post receives */
4556     for (i=0; i<nrecvs; i++){
4557       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4558       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4559       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4560     }
4561 
4562     /* pack the outgoing message */
4563     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4564     rstartsj = sstartsj + nsends +1;
4565     sstartsj[0] = 0;  rstartsj[0] = 0;
4566     len = 0; /* total length of j or a array to be sent */
4567     k = 0;
4568     for (i=0; i<nsends; i++){
4569       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4570       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4571       for (j=0; j<nrows; j++) {
4572         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4573         for (l=0; l<sbs; l++){
4574           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4575           rowlen[j*sbs+l] = ncols;
4576           len += ncols;
4577           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4578         }
4579         k++;
4580       }
4581       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4582       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4583     }
4584     /* recvs and sends of i-array are completed */
4585     i = nrecvs;
4586     while (i--) {
4587       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4588     }
4589     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4590 
4591     /* allocate buffers for sending j and a arrays */
4592     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4593     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4594 
4595     /* create i-array of B_oth */
4596     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4597     b_othi[0] = 0;
4598     len = 0; /* total length of j or a array to be received */
4599     k = 0;
4600     for (i=0; i<nrecvs; i++){
4601       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4602       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4603       for (j=0; j<nrows; j++) {
4604         b_othi[k+1] = b_othi[k] + rowlen[j];
4605         len += rowlen[j]; k++;
4606       }
4607       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4608     }
4609 
4610     /* allocate space for j and a arrrays of B_oth */
4611     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4612     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4613 
4614     /* j-array */
4615     /*---------*/
4616     /*  post receives of j-array */
4617     for (i=0; i<nrecvs; i++){
4618       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4619       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4620     }
4621 
4622     /* pack the outgoing message j-array */
4623     k = 0;
4624     for (i=0; i<nsends; i++){
4625       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4626       bufJ = bufj+sstartsj[i];
4627       for (j=0; j<nrows; j++) {
4628         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4629         for (ll=0; ll<sbs; ll++){
4630           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4631           for (l=0; l<ncols; l++){
4632             *bufJ++ = cols[l];
4633           }
4634           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4635         }
4636       }
4637       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4638     }
4639 
4640     /* recvs and sends of j-array are completed */
4641     i = nrecvs;
4642     while (i--) {
4643       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4644     }
4645     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4646   } else if (scall == MAT_REUSE_MATRIX){
4647     sstartsj = *startsj;
4648     rstartsj = sstartsj + nsends +1;
4649     bufa     = *bufa_ptr;
4650     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4651     b_otha   = b_oth->a;
4652   } else {
4653     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4654   }
4655 
4656   /* a-array */
4657   /*---------*/
4658   /*  post receives of a-array */
4659   for (i=0; i<nrecvs; i++){
4660     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4661     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4662   }
4663 
4664   /* pack the outgoing message a-array */
4665   k = 0;
4666   for (i=0; i<nsends; i++){
4667     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4668     bufA = bufa+sstartsj[i];
4669     for (j=0; j<nrows; j++) {
4670       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4671       for (ll=0; ll<sbs; ll++){
4672         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4673         for (l=0; l<ncols; l++){
4674           *bufA++ = vals[l];
4675         }
4676         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4677       }
4678     }
4679     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4680   }
4681   /* recvs and sends of a-array are completed */
4682   i = nrecvs;
4683   while (i--) {
4684     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4685   }
4686   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4687   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4688 
4689   if (scall == MAT_INITIAL_MATRIX){
4690     /* put together the new matrix */
4691     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4692 
4693     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4694     /* Since these are PETSc arrays, change flags to free them as necessary. */
4695     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4696     b_oth->free_a  = PETSC_TRUE;
4697     b_oth->free_ij = PETSC_TRUE;
4698     b_oth->nonew   = 0;
4699 
4700     ierr = PetscFree(bufj);CHKERRQ(ierr);
4701     if (!startsj || !bufa_ptr){
4702       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4703       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4704     } else {
4705       *startsj  = sstartsj;
4706       *bufa_ptr = bufa;
4707     }
4708   }
4709   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4710   PetscFunctionReturn(0);
4711 }
4712 
4713 #undef __FUNCT__
4714 #define __FUNCT__ "MatGetCommunicationStructs"
4715 /*@C
4716   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4717 
4718   Not Collective
4719 
4720   Input Parameters:
4721 . A - The matrix in mpiaij format
4722 
4723   Output Parameter:
4724 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4725 . colmap - A map from global column index to local index into lvec
4726 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4727 
4728   Level: developer
4729 
4730 @*/
4731 #if defined (PETSC_USE_CTABLE)
4732 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4733 #else
4734 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4735 #endif
4736 {
4737   Mat_MPIAIJ *a;
4738 
4739   PetscFunctionBegin;
4740   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4741   PetscValidPointer(lvec, 2)
4742   PetscValidPointer(colmap, 3)
4743   PetscValidPointer(multScatter, 4)
4744   a = (Mat_MPIAIJ *) A->data;
4745   if (lvec) *lvec = a->lvec;
4746   if (colmap) *colmap = a->colmap;
4747   if (multScatter) *multScatter = a->Mvctx;
4748   PetscFunctionReturn(0);
4749 }
4750 
4751 EXTERN_C_BEGIN
4752 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4753 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4754 EXTERN_C_END
4755 
4756 #include "src/mat/impls/dense/mpi/mpidense.h"
4757 
4758 #undef __FUNCT__
4759 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4760 /*
4761     Computes (B'*A')' since computing B*A directly is untenable
4762 
4763                n                       p                          p
4764         (              )       (              )         (                  )
4765       m (      A       )  *  n (       B      )   =   m (         C        )
4766         (              )       (              )         (                  )
4767 
4768 */
4769 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4770 {
4771   PetscErrorCode     ierr;
4772   Mat                At,Bt,Ct;
4773 
4774   PetscFunctionBegin;
4775   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4776   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4777   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4778   ierr = MatDestroy(At);CHKERRQ(ierr);
4779   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4780   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4781   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4782   PetscFunctionReturn(0);
4783 }
4784 
4785 #undef __FUNCT__
4786 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4787 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4788 {
4789   PetscErrorCode ierr;
4790   PetscInt       m=A->rmap->n,n=B->cmap->n;
4791   Mat            Cmat;
4792 
4793   PetscFunctionBegin;
4794   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4795   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4796   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4797   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4798   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4799   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4800   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4801   *C   = Cmat;
4802   PetscFunctionReturn(0);
4803 }
4804 
4805 /* ----------------------------------------------------------------*/
4806 #undef __FUNCT__
4807 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4808 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4809 {
4810   PetscErrorCode ierr;
4811 
4812   PetscFunctionBegin;
4813   if (scall == MAT_INITIAL_MATRIX){
4814     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4815   }
4816   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4817   PetscFunctionReturn(0);
4818 }
4819 
4820 EXTERN_C_BEGIN
4821 #if defined(PETSC_HAVE_MUMPS)
4822 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4823 #endif
4824 #if defined(PETSC_HAVE_SUPERLU_DIST)
4825 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
4826 #endif
4827 #if defined(PETSC_HAVE_SPOOLES)
4828 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
4829 #endif
4830 EXTERN_C_END
4831 
4832 /*MC
4833    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4834 
4835    Options Database Keys:
4836 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4837 
4838   Level: beginner
4839 
4840 .seealso: MatCreateMPIAIJ()
4841 M*/
4842 
4843 EXTERN_C_BEGIN
4844 #undef __FUNCT__
4845 #define __FUNCT__ "MatCreate_MPIAIJ"
4846 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4847 {
4848   Mat_MPIAIJ     *b;
4849   PetscErrorCode ierr;
4850   PetscMPIInt    size;
4851 
4852   PetscFunctionBegin;
4853   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4854 
4855   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4856   B->data         = (void*)b;
4857   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4858   B->rmap->bs      = 1;
4859   B->assembled    = PETSC_FALSE;
4860   B->mapping      = 0;
4861 
4862   B->insertmode      = NOT_SET_VALUES;
4863   b->size            = size;
4864   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4865 
4866   /* build cache for off array entries formed */
4867   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4868   b->donotstash  = PETSC_FALSE;
4869   b->colmap      = 0;
4870   b->garray      = 0;
4871   b->roworiented = PETSC_TRUE;
4872 
4873   /* stuff used for matrix vector multiply */
4874   b->lvec      = PETSC_NULL;
4875   b->Mvctx     = PETSC_NULL;
4876 
4877   /* stuff for MatGetRow() */
4878   b->rowindices   = 0;
4879   b->rowvalues    = 0;
4880   b->getrowactive = PETSC_FALSE;
4881 
4882 #if defined(PETSC_HAVE_SPOOLES)
4883   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_spooles_C",
4884                                      "MatGetFactor_mpiaij_spooles",
4885                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
4886 #endif
4887 #if defined(PETSC_HAVE_MUMPS)
4888   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_mumps_C",
4889                                      "MatGetFactor_mpiaij_mumps",
4890                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
4891 #endif
4892 #if defined(PETSC_HAVE_SUPERLU_DIST)
4893   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpiaij_superlu_dist_C",
4894                                      "MatGetFactor_mpiaij_superlu_dist",
4895                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
4896 #endif
4897   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4898                                      "MatStoreValues_MPIAIJ",
4899                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4900   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4901                                      "MatRetrieveValues_MPIAIJ",
4902                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4903   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4904 				     "MatGetDiagonalBlock_MPIAIJ",
4905                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4906   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4907 				     "MatIsTranspose_MPIAIJ",
4908 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4909   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4910 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4911 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4912   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4913 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4914 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4915   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4916 				     "MatDiagonalScaleLocal_MPIAIJ",
4917 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4918   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4919                                      "MatConvert_MPIAIJ_MPICSRPERM",
4920                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4921   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4922                                      "MatConvert_MPIAIJ_MPICRL",
4923                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4924   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
4925                                      "MatMatMult_MPIDense_MPIAIJ",
4926                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
4927   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
4928                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
4929                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
4930   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
4931                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
4932                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
4933   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4934   PetscFunctionReturn(0);
4935 }
4936 EXTERN_C_END
4937 
4938 #undef __FUNCT__
4939 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4940 /*@
4941      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4942          and "off-diagonal" part of the matrix in CSR format.
4943 
4944    Collective on MPI_Comm
4945 
4946    Input Parameters:
4947 +  comm - MPI communicator
4948 .  m - number of local rows (Cannot be PETSC_DECIDE)
4949 .  n - This value should be the same as the local size used in creating the
4950        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4951        calculated if N is given) For square matrices n is almost always m.
4952 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4953 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4954 .   i - row indices for "diagonal" portion of matrix
4955 .   j - column indices
4956 .   a - matrix values
4957 .   oi - row indices for "off-diagonal" portion of matrix
4958 .   oj - column indices
4959 -   oa - matrix values
4960 
4961    Output Parameter:
4962 .   mat - the matrix
4963 
4964    Level: advanced
4965 
4966    Notes:
4967        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4968 
4969        The i and j indices are 0 based
4970 
4971        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4972 
4973 
4974 .keywords: matrix, aij, compressed row, sparse, parallel
4975 
4976 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4977           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4978 @*/
4979 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4980 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4981 {
4982   PetscErrorCode ierr;
4983   Mat_MPIAIJ     *maij;
4984 
4985  PetscFunctionBegin;
4986   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4987   if (i[0]) {
4988     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4989   }
4990   if (oi[0]) {
4991     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4992   }
4993   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4994   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4995   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4996   maij = (Mat_MPIAIJ*) (*mat)->data;
4997   maij->donotstash     = PETSC_TRUE;
4998   (*mat)->preallocated = PETSC_TRUE;
4999 
5000   (*mat)->rmap->bs = (*mat)->cmap->bs = 1;
5001   ierr = PetscMapSetUp((*mat)->rmap);CHKERRQ(ierr);
5002   ierr = PetscMapSetUp((*mat)->cmap);CHKERRQ(ierr);
5003 
5004   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5005   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5006 
5007   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5008   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5009   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5010   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5011 
5012   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5013   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5014   PetscFunctionReturn(0);
5015 }
5016 
5017 /*
5018     Special version for direct calls from Fortran
5019 */
5020 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5021 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5022 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5023 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5024 #endif
5025 
5026 /* Change these macros so can be used in void function */
5027 #undef CHKERRQ
5028 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5029 #undef SETERRQ2
5030 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5031 #undef SETERRQ
5032 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5033 
5034 EXTERN_C_BEGIN
5035 #undef __FUNCT__
5036 #define __FUNCT__ "matsetvaluesmpiaij_"
5037 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5038 {
5039   Mat             mat = *mmat;
5040   PetscInt        m = *mm, n = *mn;
5041   InsertMode      addv = *maddv;
5042   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5043   PetscScalar     value;
5044   PetscErrorCode  ierr;
5045 
5046   MatPreallocated(mat);
5047   if (mat->insertmode == NOT_SET_VALUES) {
5048     mat->insertmode = addv;
5049   }
5050 #if defined(PETSC_USE_DEBUG)
5051   else if (mat->insertmode != addv) {
5052     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5053   }
5054 #endif
5055   {
5056   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5057   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5058   PetscTruth      roworiented = aij->roworiented;
5059 
5060   /* Some Variables required in the macro */
5061   Mat             A = aij->A;
5062   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5063   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5064   MatScalar       *aa = a->a;
5065   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5066   Mat             B = aij->B;
5067   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5068   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5069   MatScalar       *ba = b->a;
5070 
5071   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5072   PetscInt        nonew = a->nonew;
5073   MatScalar       *ap1,*ap2;
5074 
5075   PetscFunctionBegin;
5076   for (i=0; i<m; i++) {
5077     if (im[i] < 0) continue;
5078 #if defined(PETSC_USE_DEBUG)
5079     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5080 #endif
5081     if (im[i] >= rstart && im[i] < rend) {
5082       row      = im[i] - rstart;
5083       lastcol1 = -1;
5084       rp1      = aj + ai[row];
5085       ap1      = aa + ai[row];
5086       rmax1    = aimax[row];
5087       nrow1    = ailen[row];
5088       low1     = 0;
5089       high1    = nrow1;
5090       lastcol2 = -1;
5091       rp2      = bj + bi[row];
5092       ap2      = ba + bi[row];
5093       rmax2    = bimax[row];
5094       nrow2    = bilen[row];
5095       low2     = 0;
5096       high2    = nrow2;
5097 
5098       for (j=0; j<n; j++) {
5099         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5100         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5101         if (in[j] >= cstart && in[j] < cend){
5102           col = in[j] - cstart;
5103           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5104         } else if (in[j] < 0) continue;
5105 #if defined(PETSC_USE_DEBUG)
5106         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5107 #endif
5108         else {
5109           if (mat->was_assembled) {
5110             if (!aij->colmap) {
5111               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5112             }
5113 #if defined (PETSC_USE_CTABLE)
5114             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5115 	    col--;
5116 #else
5117             col = aij->colmap[in[j]] - 1;
5118 #endif
5119             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5120               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5121               col =  in[j];
5122               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5123               B = aij->B;
5124               b = (Mat_SeqAIJ*)B->data;
5125               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5126               rp2      = bj + bi[row];
5127               ap2      = ba + bi[row];
5128               rmax2    = bimax[row];
5129               nrow2    = bilen[row];
5130               low2     = 0;
5131               high2    = nrow2;
5132               bm       = aij->B->rmap->n;
5133               ba = b->a;
5134             }
5135           } else col = in[j];
5136           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5137         }
5138       }
5139     } else {
5140       if (!aij->donotstash) {
5141         if (roworiented) {
5142           if (ignorezeroentries && v[i*n] == 0.0) continue;
5143           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
5144         } else {
5145           if (ignorezeroentries && v[i] == 0.0) continue;
5146           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
5147         }
5148       }
5149     }
5150   }}
5151   PetscFunctionReturnVoid();
5152 }
5153 EXTERN_C_END
5154 
5155