xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d736bfeb4d37a01fcbdf00fe73fb60d6f0ba2142)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   if (v) PetscValidScalarPointer(v,6);
331   for (i=0; i<m; i++) {
332     if (im[i] < 0) continue;
333 #if defined(PETSC_USE_DEBUG)
334     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
335 #endif
336     if (im[i] >= rstart && im[i] < rend) {
337       row      = im[i] - rstart;
338       lastcol1 = -1;
339       rp1      = aj + ai[row];
340       ap1      = aa + ai[row];
341       rmax1    = aimax[row];
342       nrow1    = ailen[row];
343       low1     = 0;
344       high1    = nrow1;
345       lastcol2 = -1;
346       rp2      = bj + bi[row];
347       ap2      = ba + bi[row];
348       rmax2    = bimax[row];
349       nrow2    = bilen[row];
350       low2     = 0;
351       high2    = nrow2;
352 
353       for (j=0; j<n; j++) {
354         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
355         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
356         if (in[j] >= cstart && in[j] < cend){
357           col = in[j] - cstart;
358           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
359         } else if (in[j] < 0) continue;
360 #if defined(PETSC_USE_DEBUG)
361         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
362 #endif
363         else {
364           if (mat->was_assembled) {
365             if (!aij->colmap) {
366               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
367             }
368 #if defined (PETSC_USE_CTABLE)
369             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
370 	    col--;
371 #else
372             col = aij->colmap[in[j]] - 1;
373 #endif
374             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
375               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
376               col =  in[j];
377               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
378               B = aij->B;
379               b = (Mat_SeqAIJ*)B->data;
380               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
381               rp2      = bj + bi[row];
382               ap2      = ba + bi[row];
383               rmax2    = bimax[row];
384               nrow2    = bilen[row];
385               low2     = 0;
386               high2    = nrow2;
387               bm       = aij->B->rmap->n;
388               ba = b->a;
389             }
390           } else col = in[j];
391           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
392         }
393       }
394     } else {
395       if (!aij->donotstash) {
396         if (roworiented) {
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         } else {
399           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
400         }
401       }
402     }
403   }
404   PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatGetValues_MPIAIJ"
409 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
410 {
411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
414   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
415 
416   PetscFunctionBegin;
417   for (i=0; i<m; i++) {
418     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
419     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
420     if (idxm[i] >= rstart && idxm[i] < rend) {
421       row = idxm[i] - rstart;
422       for (j=0; j<n; j++) {
423         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
424         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
425         if (idxn[j] >= cstart && idxn[j] < cend){
426           col = idxn[j] - cstart;
427           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
428         } else {
429           if (!aij->colmap) {
430             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
431           }
432 #if defined (PETSC_USE_CTABLE)
433           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
434           col --;
435 #else
436           col = aij->colmap[idxn[j]] - 1;
437 #endif
438           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
439           else {
440             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
441           }
442         }
443       }
444     } else {
445       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
446     }
447   }
448   PetscFunctionReturn(0);
449 }
450 
451 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
452 
453 #undef __FUNCT__
454 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
455 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
456 {
457   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
458   PetscErrorCode ierr;
459   PetscInt       nstash,reallocs;
460   InsertMode     addv;
461 
462   PetscFunctionBegin;
463   if (aij->donotstash) {
464     PetscFunctionReturn(0);
465   }
466 
467   /* make sure all processors are either in INSERTMODE or ADDMODE */
468   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
469   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   mat->insertmode = addv; /* in case this processor had no cache */
471 
472   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
473   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
474   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
480 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
481 {
482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
483   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
484   PetscErrorCode ierr;
485   PetscMPIInt    n;
486   PetscInt       i,j,rstart,ncols,flg;
487   PetscInt       *row,*col;
488   PetscTruth     other_disassembled;
489   PetscScalar    *val;
490   InsertMode     addv = mat->insertmode;
491 
492   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
493   PetscFunctionBegin;
494   if (!aij->donotstash) {
495     while (1) {
496       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
497       if (!flg) break;
498 
499       for (i=0; i<n;) {
500         /* Now identify the consecutive vals belonging to the same row */
501         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
502         if (j < n) ncols = j-i;
503         else       ncols = n-i;
504         /* Now assemble all these values with a single function call */
505         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
506         i = j;
507       }
508     }
509     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
510   }
511   a->compressedrow.use     = PETSC_FALSE;
512   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
513   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
514 
515   /* determine if any processor has disassembled, if so we must
516      also disassemble ourselfs, in order that we may reassemble. */
517   /*
518      if nonzero structure of submatrix B cannot change then we know that
519      no processor disassembled thus we can skip this stuff
520   */
521   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
522     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
523     if (mat->was_assembled && !other_disassembled) {
524       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
525     }
526   }
527   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
528     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
529   }
530   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
531   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
532   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
533   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
534 
535   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
536   aij->rowvalues = 0;
537 
538   /* used by MatAXPY() */
539   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
540   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
541 
542   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
543   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
549 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
550 {
551   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
552   PetscErrorCode ierr;
553 
554   PetscFunctionBegin;
555   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
556   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatZeroRows_MPIAIJ"
562 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
563 {
564   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
567   PetscInt       i,*owners = A->rmap->range;
568   PetscInt       *nprocs,j,idx,nsends,row;
569   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
570   PetscInt       *rvalues,count,base,slen,*source;
571   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
572   MPI_Comm       comm = ((PetscObject)A)->comm;
573   MPI_Request    *send_waits,*recv_waits;
574   MPI_Status     recv_status,*send_status;
575 #if defined(PETSC_DEBUG)
576   PetscTruth     found = PETSC_FALSE;
577 #endif
578 
579   PetscFunctionBegin;
580   /*  first count number of contributors to each processor */
581   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
582   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
583   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
584   j = 0;
585   for (i=0; i<N; i++) {
586     if (lastidx > (idx = rows[i])) j = 0;
587     lastidx = idx;
588     for (; j<size; j++) {
589       if (idx >= owners[j] && idx < owners[j+1]) {
590         nprocs[2*j]++;
591         nprocs[2*j+1] = 1;
592         owner[i] = j;
593 #if defined(PETSC_DEBUG)
594         found = PETSC_TRUE;
595 #endif
596         break;
597       }
598     }
599 #if defined(PETSC_DEBUG)
600     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
601     found = PETSC_FALSE;
602 #endif
603   }
604   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
605 
606   /* inform other processors of number of messages and max length*/
607   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
608 
609   /* post receives:   */
610   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
611   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
612   for (i=0; i<nrecvs; i++) {
613     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
614   }
615 
616   /* do sends:
617       1) starts[i] gives the starting index in svalues for stuff going to
618          the ith processor
619   */
620   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
621   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
622   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
623   starts[0] = 0;
624   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
625   for (i=0; i<N; i++) {
626     svalues[starts[owner[i]]++] = rows[i];
627   }
628 
629   starts[0] = 0;
630   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
631   count = 0;
632   for (i=0; i<size; i++) {
633     if (nprocs[2*i+1]) {
634       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
635     }
636   }
637   ierr = PetscFree(starts);CHKERRQ(ierr);
638 
639   base = owners[rank];
640 
641   /*  wait on receives */
642   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
643   count  = nrecvs; slen = 0;
644   while (count) {
645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
646     /* unpack receives into our local space */
647     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
648     source[imdex]  = recv_status.MPI_SOURCE;
649     lens[imdex]    = n;
650     slen          += n;
651     count--;
652   }
653   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
654 
655   /* move the data into the send scatter */
656   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
657   count = 0;
658   for (i=0; i<nrecvs; i++) {
659     values = rvalues + i*nmax;
660     for (j=0; j<lens[i]; j++) {
661       lrows[count++] = values[j] - base;
662     }
663   }
664   ierr = PetscFree(rvalues);CHKERRQ(ierr);
665   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
666   ierr = PetscFree(owner);CHKERRQ(ierr);
667   ierr = PetscFree(nprocs);CHKERRQ(ierr);
668 
669   /* actually zap the local rows */
670   /*
671         Zero the required rows. If the "diagonal block" of the matrix
672      is square and the user wishes to set the diagonal we use separate
673      code so that MatSetValues() is not called for each diagonal allocating
674      new memory, thus calling lots of mallocs and slowing things down.
675 
676   */
677   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
678   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
679   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
680     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
681   } else if (diag != 0.0) {
682     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
683     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
684       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
685 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
686     }
687     for (i = 0; i < slen; i++) {
688       row  = lrows[i] + rstart;
689       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
690     }
691     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
692     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693   } else {
694     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
695   }
696   ierr = PetscFree(lrows);CHKERRQ(ierr);
697 
698   /* wait on sends */
699   if (nsends) {
700     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
701     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
702     ierr = PetscFree(send_status);CHKERRQ(ierr);
703   }
704   ierr = PetscFree(send_waits);CHKERRQ(ierr);
705   ierr = PetscFree(svalues);CHKERRQ(ierr);
706 
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatMult_MPIAIJ"
712 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
713 {
714   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
715   PetscErrorCode ierr;
716   PetscInt       nt;
717 
718   PetscFunctionBegin;
719   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
720   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
721   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
722   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
723   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
724   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNCT__
729 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
730 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
731 {
732   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
733   PetscErrorCode ierr;
734 
735   PetscFunctionBegin;
736   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatMultAdd_MPIAIJ"
742 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
743 {
744   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
749   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
750   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
751   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
752   PetscFunctionReturn(0);
753 }
754 
755 #undef __FUNCT__
756 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
757 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
758 {
759   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
760   PetscErrorCode ierr;
761   PetscTruth     merged;
762 
763   PetscFunctionBegin;
764   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
765   /* do nondiagonal part */
766   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
767   if (!merged) {
768     /* send it on its way */
769     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
770     /* do local part */
771     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
772     /* receive remote parts: note this assumes the values are not actually */
773     /* added in yy until the next line, */
774     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
775   } else {
776     /* do local part */
777     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
778     /* send it on its way */
779     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780     /* values actually were received in the Begin() but we need to call this nop */
781     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
782   }
783   PetscFunctionReturn(0);
784 }
785 
786 EXTERN_C_BEGIN
787 #undef __FUNCT__
788 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
789 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
790 {
791   MPI_Comm       comm;
792   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
793   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
794   IS             Me,Notme;
795   PetscErrorCode ierr;
796   PetscInt       M,N,first,last,*notme,i;
797   PetscMPIInt    size;
798 
799   PetscFunctionBegin;
800 
801   /* Easy test: symmetric diagonal block */
802   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
803   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
804   if (!*f) PetscFunctionReturn(0);
805   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
806   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
807   if (size == 1) PetscFunctionReturn(0);
808 
809   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
810   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
811   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
812   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
813   for (i=0; i<first; i++) notme[i] = i;
814   for (i=last; i<M; i++) notme[i-last+first] = i;
815   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
816   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
817   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
818   Aoff = Aoffs[0];
819   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
820   Boff = Boffs[0];
821   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
822   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
823   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
824   ierr = ISDestroy(Me);CHKERRQ(ierr);
825   ierr = ISDestroy(Notme);CHKERRQ(ierr);
826 
827   PetscFunctionReturn(0);
828 }
829 EXTERN_C_END
830 
831 #undef __FUNCT__
832 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
833 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
834 {
835   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
836   PetscErrorCode ierr;
837 
838   PetscFunctionBegin;
839   /* do nondiagonal part */
840   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
841   /* send it on its way */
842   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
843   /* do local part */
844   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
845   /* receive remote parts */
846   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
847   PetscFunctionReturn(0);
848 }
849 
850 /*
851   This only works correctly for square matrices where the subblock A->A is the
852    diagonal block
853 */
854 #undef __FUNCT__
855 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
856 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
857 {
858   PetscErrorCode ierr;
859   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
860 
861   PetscFunctionBegin;
862   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
863   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
864   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatScale_MPIAIJ"
870 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
871 {
872   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
873   PetscErrorCode ierr;
874 
875   PetscFunctionBegin;
876   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
877   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
878   PetscFunctionReturn(0);
879 }
880 
881 #undef __FUNCT__
882 #define __FUNCT__ "MatDestroy_MPIAIJ"
883 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
884 {
885   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
886   PetscErrorCode ierr;
887 
888   PetscFunctionBegin;
889 #if defined(PETSC_USE_LOG)
890   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
891 #endif
892   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
893   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
894   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
895   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
896 #if defined (PETSC_USE_CTABLE)
897   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
898 #else
899   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
900 #endif
901   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
902   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
903   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
904   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
905   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
906   ierr = PetscFree(aij);CHKERRQ(ierr);
907 
908   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
912   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
917   PetscFunctionReturn(0);
918 }
919 
920 #undef __FUNCT__
921 #define __FUNCT__ "MatView_MPIAIJ_Binary"
922 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
923 {
924   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
925   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
926   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
927   PetscErrorCode    ierr;
928   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
929   int               fd;
930   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
931   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
932   PetscScalar       *column_values;
933 
934   PetscFunctionBegin;
935   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
936   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
937   nz   = A->nz + B->nz;
938   if (!rank) {
939     header[0] = MAT_FILE_CLASSID;
940     header[1] = mat->rmap->N;
941     header[2] = mat->cmap->N;
942     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
943     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
944     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
945     /* get largest number of rows any processor has */
946     rlen = mat->rmap->n;
947     range = mat->rmap->range;
948     for (i=1; i<size; i++) {
949       rlen = PetscMax(rlen,range[i+1] - range[i]);
950     }
951   } else {
952     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
953     rlen = mat->rmap->n;
954   }
955 
956   /* load up the local row counts */
957   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
958   for (i=0; i<mat->rmap->n; i++) {
959     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
960   }
961 
962   /* store the row lengths to the file */
963   if (!rank) {
964     MPI_Status status;
965     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
966     for (i=1; i<size; i++) {
967       rlen = range[i+1] - range[i];
968       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
969       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
970     }
971   } else {
972     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
973   }
974   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
975 
976   /* load up the local column indices */
977   nzmax = nz; /* )th processor needs space a largest processor needs */
978   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
979   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
980   cnt  = 0;
981   for (i=0; i<mat->rmap->n; i++) {
982     for (j=B->i[i]; j<B->i[i+1]; j++) {
983       if ( (col = garray[B->j[j]]) > cstart) break;
984       column_indices[cnt++] = col;
985     }
986     for (k=A->i[i]; k<A->i[i+1]; k++) {
987       column_indices[cnt++] = A->j[k] + cstart;
988     }
989     for (; j<B->i[i+1]; j++) {
990       column_indices[cnt++] = garray[B->j[j]];
991     }
992   }
993   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
994 
995   /* store the column indices to the file */
996   if (!rank) {
997     MPI_Status status;
998     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
999     for (i=1; i<size; i++) {
1000       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1001       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1002       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1003       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1004     }
1005   } else {
1006     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1007     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1008   }
1009   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1010 
1011   /* load up the local column values */
1012   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1013   cnt  = 0;
1014   for (i=0; i<mat->rmap->n; i++) {
1015     for (j=B->i[i]; j<B->i[i+1]; j++) {
1016       if ( garray[B->j[j]] > cstart) break;
1017       column_values[cnt++] = B->a[j];
1018     }
1019     for (k=A->i[i]; k<A->i[i+1]; k++) {
1020       column_values[cnt++] = A->a[k];
1021     }
1022     for (; j<B->i[i+1]; j++) {
1023       column_values[cnt++] = B->a[j];
1024     }
1025   }
1026   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1027 
1028   /* store the column values to the file */
1029   if (!rank) {
1030     MPI_Status status;
1031     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1032     for (i=1; i<size; i++) {
1033       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1034       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1035       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1036       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1037     }
1038   } else {
1039     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1040     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1041   }
1042   ierr = PetscFree(column_values);CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1048 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1049 {
1050   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1051   PetscErrorCode    ierr;
1052   PetscMPIInt       rank = aij->rank,size = aij->size;
1053   PetscTruth        isdraw,iascii,isbinary;
1054   PetscViewer       sviewer;
1055   PetscViewerFormat format;
1056 
1057   PetscFunctionBegin;
1058   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1059   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1060   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1061   if (iascii) {
1062     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1063     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1064       MatInfo    info;
1065       PetscTruth inodes;
1066 
1067       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1068       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1069       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1070       if (!inodes) {
1071         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1072 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1073       } else {
1074         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1075 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1076       }
1077       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1078       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1079       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1080       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1081       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1082       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1083       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1084       PetscFunctionReturn(0);
1085     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1086       PetscInt   inodecount,inodelimit,*inodes;
1087       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1088       if (inodes) {
1089         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1090       } else {
1091         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1092       }
1093       PetscFunctionReturn(0);
1094     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1095       PetscFunctionReturn(0);
1096     }
1097   } else if (isbinary) {
1098     if (size == 1) {
1099       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1100       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1101     } else {
1102       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1103     }
1104     PetscFunctionReturn(0);
1105   } else if (isdraw) {
1106     PetscDraw  draw;
1107     PetscTruth isnull;
1108     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1109     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1110   }
1111 
1112   if (size == 1) {
1113     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1114     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1115   } else {
1116     /* assemble the entire matrix onto first processor. */
1117     Mat         A;
1118     Mat_SeqAIJ  *Aloc;
1119     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1120     MatScalar   *a;
1121 
1122     if (mat->rmap->N > 1024) {
1123       PetscTruth flg = PETSC_FALSE;
1124 
1125       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1126       if (!flg) {
1127         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1128       }
1129     }
1130 
1131     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1132     if (!rank) {
1133       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1134     } else {
1135       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1136     }
1137     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1138     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1139     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1140     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1141 
1142     /* copy over the A part */
1143     Aloc = (Mat_SeqAIJ*)aij->A->data;
1144     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1145     row = mat->rmap->rstart;
1146     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1147     for (i=0; i<m; i++) {
1148       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1149       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1150     }
1151     aj = Aloc->j;
1152     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1153 
1154     /* copy over the B part */
1155     Aloc = (Mat_SeqAIJ*)aij->B->data;
1156     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1157     row  = mat->rmap->rstart;
1158     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1159     ct   = cols;
1160     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1161     for (i=0; i<m; i++) {
1162       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1163       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1164     }
1165     ierr = PetscFree(ct);CHKERRQ(ierr);
1166     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1167     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1168     /*
1169        Everyone has to call to draw the matrix since the graphics waits are
1170        synchronized across all processors that share the PetscDraw object
1171     */
1172     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1173     if (!rank) {
1174       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1175       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1176     }
1177     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1178     ierr = MatDestroy(A);CHKERRQ(ierr);
1179   }
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MatView_MPIAIJ"
1185 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1186 {
1187   PetscErrorCode ierr;
1188   PetscTruth     iascii,isdraw,issocket,isbinary;
1189 
1190   PetscFunctionBegin;
1191   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1192   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1193   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1194   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1195   if (iascii || isdraw || isbinary || issocket) {
1196     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1197   } else {
1198     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1199   }
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 #undef __FUNCT__
1204 #define __FUNCT__ "MatSOR_MPIAIJ"
1205 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1206 {
1207   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1208   PetscErrorCode ierr;
1209   Vec            bb1 = 0;
1210   PetscTruth     hasop;
1211 
1212   PetscFunctionBegin;
1213   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1214     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1215   }
1216 
1217   if (flag == SOR_APPLY_UPPER) {
1218     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1219     PetscFunctionReturn(0);
1220   }
1221 
1222   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1223     if (flag & SOR_ZERO_INITIAL_GUESS) {
1224       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1225       its--;
1226     }
1227 
1228     while (its--) {
1229       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1230       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1231 
1232       /* update rhs: bb1 = bb - B*x */
1233       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1234       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1235 
1236       /* local sweep */
1237       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1238     }
1239   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1240     if (flag & SOR_ZERO_INITIAL_GUESS) {
1241       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1242       its--;
1243     }
1244     while (its--) {
1245       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1246       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1247 
1248       /* update rhs: bb1 = bb - B*x */
1249       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1250       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1251 
1252       /* local sweep */
1253       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1254     }
1255   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1256     if (flag & SOR_ZERO_INITIAL_GUESS) {
1257       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1258       its--;
1259     }
1260     while (its--) {
1261       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1262       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1263 
1264       /* update rhs: bb1 = bb - B*x */
1265       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1266       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1267 
1268       /* local sweep */
1269       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1270     }
1271   }  else if (flag & SOR_EISENSTAT) {
1272     Vec         xx1;
1273 
1274     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1275     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1276 
1277     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1278     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1279     if (!mat->diag) {
1280       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1281       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1282     }
1283     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1284     if (hasop) {
1285       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1286     } else {
1287       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1288     }
1289     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1290 
1291     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1292 
1293     /* local sweep */
1294     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1295     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1296     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1297   } else {
1298     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1299   }
1300 
1301   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatPermute_MPIAIJ"
1307 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1308 {
1309   MPI_Comm       comm,pcomm;
1310   PetscInt       first,local_size,nrows;
1311   const PetscInt *rows;
1312   PetscMPIInt    size;
1313   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1318   /* make a collective version of 'rowp' */
1319   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1320   if (pcomm==comm) {
1321     crowp = rowp;
1322   } else {
1323     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1324     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1325     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1326     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1327   }
1328   /* collect the global row permutation and invert it */
1329   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1330   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1331   if (pcomm!=comm) {
1332     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1333   }
1334   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1335   /* get the local target indices */
1336   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1337   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1338   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1339   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1340   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1341   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1342   /* the column permutation is so much easier;
1343      make a local version of 'colp' and invert it */
1344   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1345   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1346   if (size==1) {
1347     lcolp = colp;
1348   } else {
1349     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1350     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1351     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1352   }
1353   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1354   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1355   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1356   if (size>1) {
1357     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1358     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1359   }
1360   /* now we just get the submatrix */
1361   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1362   /* clean up */
1363   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1364   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1370 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1371 {
1372   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1373   Mat            A = mat->A,B = mat->B;
1374   PetscErrorCode ierr;
1375   PetscReal      isend[5],irecv[5];
1376 
1377   PetscFunctionBegin;
1378   info->block_size     = 1.0;
1379   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1380   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1381   isend[3] = info->memory;  isend[4] = info->mallocs;
1382   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1383   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1384   isend[3] += info->memory;  isend[4] += info->mallocs;
1385   if (flag == MAT_LOCAL) {
1386     info->nz_used      = isend[0];
1387     info->nz_allocated = isend[1];
1388     info->nz_unneeded  = isend[2];
1389     info->memory       = isend[3];
1390     info->mallocs      = isend[4];
1391   } else if (flag == MAT_GLOBAL_MAX) {
1392     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1393     info->nz_used      = irecv[0];
1394     info->nz_allocated = irecv[1];
1395     info->nz_unneeded  = irecv[2];
1396     info->memory       = irecv[3];
1397     info->mallocs      = irecv[4];
1398   } else if (flag == MAT_GLOBAL_SUM) {
1399     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1400     info->nz_used      = irecv[0];
1401     info->nz_allocated = irecv[1];
1402     info->nz_unneeded  = irecv[2];
1403     info->memory       = irecv[3];
1404     info->mallocs      = irecv[4];
1405   }
1406   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1407   info->fill_ratio_needed = 0;
1408   info->factor_mallocs    = 0;
1409 
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatSetOption_MPIAIJ"
1415 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1416 {
1417   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1418   PetscErrorCode ierr;
1419 
1420   PetscFunctionBegin;
1421   switch (op) {
1422   case MAT_NEW_NONZERO_LOCATIONS:
1423   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1424   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1425   case MAT_KEEP_NONZERO_PATTERN:
1426   case MAT_NEW_NONZERO_LOCATION_ERR:
1427   case MAT_USE_INODES:
1428   case MAT_IGNORE_ZERO_ENTRIES:
1429     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1430     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1431     break;
1432   case MAT_ROW_ORIENTED:
1433     a->roworiented = flg;
1434     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1435     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1436     break;
1437   case MAT_NEW_DIAGONALS:
1438     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1439     break;
1440   case MAT_IGNORE_OFF_PROC_ENTRIES:
1441     a->donotstash = PETSC_TRUE;
1442     break;
1443   case MAT_SYMMETRIC:
1444     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1445     break;
1446   case MAT_STRUCTURALLY_SYMMETRIC:
1447     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1448     break;
1449   case MAT_HERMITIAN:
1450     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1451     break;
1452   case MAT_SYMMETRY_ETERNAL:
1453     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1454     break;
1455   default:
1456     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1457   }
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 #undef __FUNCT__
1462 #define __FUNCT__ "MatGetRow_MPIAIJ"
1463 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1464 {
1465   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1466   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1467   PetscErrorCode ierr;
1468   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1469   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1470   PetscInt       *cmap,*idx_p;
1471 
1472   PetscFunctionBegin;
1473   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1474   mat->getrowactive = PETSC_TRUE;
1475 
1476   if (!mat->rowvalues && (idx || v)) {
1477     /*
1478         allocate enough space to hold information from the longest row.
1479     */
1480     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1481     PetscInt   max = 1,tmp;
1482     for (i=0; i<matin->rmap->n; i++) {
1483       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1484       if (max < tmp) { max = tmp; }
1485     }
1486     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1487   }
1488 
1489   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1490   lrow = row - rstart;
1491 
1492   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1493   if (!v)   {pvA = 0; pvB = 0;}
1494   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1495   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1496   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1497   nztot = nzA + nzB;
1498 
1499   cmap  = mat->garray;
1500   if (v  || idx) {
1501     if (nztot) {
1502       /* Sort by increasing column numbers, assuming A and B already sorted */
1503       PetscInt imark = -1;
1504       if (v) {
1505         *v = v_p = mat->rowvalues;
1506         for (i=0; i<nzB; i++) {
1507           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1508           else break;
1509         }
1510         imark = i;
1511         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1512         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1513       }
1514       if (idx) {
1515         *idx = idx_p = mat->rowindices;
1516         if (imark > -1) {
1517           for (i=0; i<imark; i++) {
1518             idx_p[i] = cmap[cworkB[i]];
1519           }
1520         } else {
1521           for (i=0; i<nzB; i++) {
1522             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1523             else break;
1524           }
1525           imark = i;
1526         }
1527         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1528         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1529       }
1530     } else {
1531       if (idx) *idx = 0;
1532       if (v)   *v   = 0;
1533     }
1534   }
1535   *nz = nztot;
1536   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1537   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 #undef __FUNCT__
1542 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1543 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1544 {
1545   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1546 
1547   PetscFunctionBegin;
1548   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1549   aij->getrowactive = PETSC_FALSE;
1550   PetscFunctionReturn(0);
1551 }
1552 
1553 #undef __FUNCT__
1554 #define __FUNCT__ "MatNorm_MPIAIJ"
1555 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1556 {
1557   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1558   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1559   PetscErrorCode ierr;
1560   PetscInt       i,j,cstart = mat->cmap->rstart;
1561   PetscReal      sum = 0.0;
1562   MatScalar      *v;
1563 
1564   PetscFunctionBegin;
1565   if (aij->size == 1) {
1566     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1567   } else {
1568     if (type == NORM_FROBENIUS) {
1569       v = amat->a;
1570       for (i=0; i<amat->nz; i++) {
1571 #if defined(PETSC_USE_COMPLEX)
1572         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1573 #else
1574         sum += (*v)*(*v); v++;
1575 #endif
1576       }
1577       v = bmat->a;
1578       for (i=0; i<bmat->nz; i++) {
1579 #if defined(PETSC_USE_COMPLEX)
1580         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1581 #else
1582         sum += (*v)*(*v); v++;
1583 #endif
1584       }
1585       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1586       *norm = sqrt(*norm);
1587     } else if (type == NORM_1) { /* max column norm */
1588       PetscReal *tmp,*tmp2;
1589       PetscInt  *jj,*garray = aij->garray;
1590       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1591       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1592       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1593       *norm = 0.0;
1594       v = amat->a; jj = amat->j;
1595       for (j=0; j<amat->nz; j++) {
1596         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1597       }
1598       v = bmat->a; jj = bmat->j;
1599       for (j=0; j<bmat->nz; j++) {
1600         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1601       }
1602       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1603       for (j=0; j<mat->cmap->N; j++) {
1604         if (tmp2[j] > *norm) *norm = tmp2[j];
1605       }
1606       ierr = PetscFree(tmp);CHKERRQ(ierr);
1607       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1608     } else if (type == NORM_INFINITY) { /* max row norm */
1609       PetscReal ntemp = 0.0;
1610       for (j=0; j<aij->A->rmap->n; j++) {
1611         v = amat->a + amat->i[j];
1612         sum = 0.0;
1613         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1614           sum += PetscAbsScalar(*v); v++;
1615         }
1616         v = bmat->a + bmat->i[j];
1617         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1618           sum += PetscAbsScalar(*v); v++;
1619         }
1620         if (sum > ntemp) ntemp = sum;
1621       }
1622       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1623     } else {
1624       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1625     }
1626   }
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 #undef __FUNCT__
1631 #define __FUNCT__ "MatTranspose_MPIAIJ"
1632 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1633 {
1634   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1635   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1636   PetscErrorCode ierr;
1637   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1638   PetscInt       cstart=A->cmap->rstart,ncol;
1639   Mat            B;
1640   MatScalar      *array;
1641 
1642   PetscFunctionBegin;
1643   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1644 
1645   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1646   ai = Aloc->i; aj = Aloc->j;
1647   bi = Bloc->i; bj = Bloc->j;
1648   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1649     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1650     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1651     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1652     for (i=0; i<ai[ma]; i++){
1653       d_nnz[aj[i]] ++;
1654       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1655     }
1656 
1657     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1658     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1659     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1660     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1661     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1662   } else {
1663     B = *matout;
1664   }
1665 
1666   /* copy over the A part */
1667   array = Aloc->a;
1668   row = A->rmap->rstart;
1669   for (i=0; i<ma; i++) {
1670     ncol = ai[i+1]-ai[i];
1671     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1672     row++; array += ncol; aj += ncol;
1673   }
1674   aj = Aloc->j;
1675   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1676 
1677   /* copy over the B part */
1678   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1679   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1680   array = Bloc->a;
1681   row = A->rmap->rstart;
1682   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1683   cols_tmp = cols;
1684   for (i=0; i<mb; i++) {
1685     ncol = bi[i+1]-bi[i];
1686     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1687     row++; array += ncol; cols_tmp += ncol;
1688   }
1689   ierr = PetscFree(cols);CHKERRQ(ierr);
1690 
1691   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1692   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1694     *matout = B;
1695   } else {
1696     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1697   }
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1703 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1704 {
1705   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1706   Mat            a = aij->A,b = aij->B;
1707   PetscErrorCode ierr;
1708   PetscInt       s1,s2,s3;
1709 
1710   PetscFunctionBegin;
1711   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1712   if (rr) {
1713     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1714     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1715     /* Overlap communication with computation. */
1716     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1717   }
1718   if (ll) {
1719     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1720     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1721     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1722   }
1723   /* scale  the diagonal block */
1724   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1725 
1726   if (rr) {
1727     /* Do a scatter end and then right scale the off-diagonal block */
1728     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1729     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1730   }
1731 
1732   PetscFunctionReturn(0);
1733 }
1734 
1735 #undef __FUNCT__
1736 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1737 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1738 {
1739   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1740   PetscErrorCode ierr;
1741 
1742   PetscFunctionBegin;
1743   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1744   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1745   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1746   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1747   PetscFunctionReturn(0);
1748 }
1749 #undef __FUNCT__
1750 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1751 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1752 {
1753   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1754   PetscErrorCode ierr;
1755 
1756   PetscFunctionBegin;
1757   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatEqual_MPIAIJ"
1763 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1764 {
1765   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1766   Mat            a,b,c,d;
1767   PetscTruth     flg;
1768   PetscErrorCode ierr;
1769 
1770   PetscFunctionBegin;
1771   a = matA->A; b = matA->B;
1772   c = matB->A; d = matB->B;
1773 
1774   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1775   if (flg) {
1776     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1777   }
1778   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1779   PetscFunctionReturn(0);
1780 }
1781 
1782 #undef __FUNCT__
1783 #define __FUNCT__ "MatCopy_MPIAIJ"
1784 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1785 {
1786   PetscErrorCode ierr;
1787   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1788   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1789 
1790   PetscFunctionBegin;
1791   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1792   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1793     /* because of the column compression in the off-processor part of the matrix a->B,
1794        the number of columns in a->B and b->B may be different, hence we cannot call
1795        the MatCopy() directly on the two parts. If need be, we can provide a more
1796        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1797        then copying the submatrices */
1798     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1799   } else {
1800     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1801     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1802   }
1803   PetscFunctionReturn(0);
1804 }
1805 
1806 #undef __FUNCT__
1807 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1808 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1809 {
1810   PetscErrorCode ierr;
1811 
1812   PetscFunctionBegin;
1813   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 #undef __FUNCT__
1818 #define __FUNCT__ "MatAXPY_MPIAIJ"
1819 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1820 {
1821   PetscErrorCode ierr;
1822   PetscInt       i;
1823   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1824   PetscBLASInt   bnz,one=1;
1825   Mat_SeqAIJ     *x,*y;
1826 
1827   PetscFunctionBegin;
1828   if (str == SAME_NONZERO_PATTERN) {
1829     PetscScalar alpha = a;
1830     x = (Mat_SeqAIJ *)xx->A->data;
1831     y = (Mat_SeqAIJ *)yy->A->data;
1832     bnz = PetscBLASIntCast(x->nz);
1833     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1834     x = (Mat_SeqAIJ *)xx->B->data;
1835     y = (Mat_SeqAIJ *)yy->B->data;
1836     bnz = PetscBLASIntCast(x->nz);
1837     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1838   } else if (str == SUBSET_NONZERO_PATTERN) {
1839     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1840 
1841     x = (Mat_SeqAIJ *)xx->B->data;
1842     y = (Mat_SeqAIJ *)yy->B->data;
1843     if (y->xtoy && y->XtoY != xx->B) {
1844       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1845       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1846     }
1847     if (!y->xtoy) { /* get xtoy */
1848       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1849       y->XtoY = xx->B;
1850       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1851     }
1852     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1853   } else {
1854     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1855   }
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatConjugate_MPIAIJ"
1863 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1864 {
1865 #if defined(PETSC_USE_COMPLEX)
1866   PetscErrorCode ierr;
1867   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1868 
1869   PetscFunctionBegin;
1870   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1871   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1872 #else
1873   PetscFunctionBegin;
1874 #endif
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatRealPart_MPIAIJ"
1880 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1881 {
1882   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1883   PetscErrorCode ierr;
1884 
1885   PetscFunctionBegin;
1886   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1887   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 #undef __FUNCT__
1892 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1893 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1894 {
1895   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1896   PetscErrorCode ierr;
1897 
1898   PetscFunctionBegin;
1899   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1900   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 #ifdef PETSC_HAVE_PBGL
1905 
1906 #include <boost/parallel/mpi/bsp_process_group.hpp>
1907 #include <boost/graph/distributed/ilu_default_graph.hpp>
1908 #include <boost/graph/distributed/ilu_0_block.hpp>
1909 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1910 #include <boost/graph/distributed/petsc/interface.hpp>
1911 #include <boost/multi_array.hpp>
1912 #include <boost/parallel/distributed_property_map->hpp>
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1916 /*
1917   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1918 */
1919 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1920 {
1921   namespace petsc = boost::distributed::petsc;
1922 
1923   namespace graph_dist = boost::graph::distributed;
1924   using boost::graph::distributed::ilu_default::process_group_type;
1925   using boost::graph::ilu_permuted;
1926 
1927   PetscTruth      row_identity, col_identity;
1928   PetscContainer  c;
1929   PetscInt        m, n, M, N;
1930   PetscErrorCode  ierr;
1931 
1932   PetscFunctionBegin;
1933   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1934   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1935   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1936   if (!row_identity || !col_identity) {
1937     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1938   }
1939 
1940   process_group_type pg;
1941   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1942   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1943   lgraph_type&   level_graph = *lgraph_p;
1944   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1945 
1946   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1947   ilu_permuted(level_graph);
1948 
1949   /* put together the new matrix */
1950   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1951   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1952   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1953   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1954   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1955   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1956   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1957 
1958   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1959   ierr = PetscContainerSetPointer(c, lgraph_p);
1960   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1966 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1967 {
1968   PetscFunctionBegin;
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatSolve_MPIAIJ"
1974 /*
1975   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1976 */
1977 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1978 {
1979   namespace graph_dist = boost::graph::distributed;
1980 
1981   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1982   lgraph_type*   lgraph_p;
1983   PetscContainer c;
1984   PetscErrorCode ierr;
1985 
1986   PetscFunctionBegin;
1987   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1988   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1989   ierr = VecCopy(b, x);CHKERRQ(ierr);
1990 
1991   PetscScalar* array_x;
1992   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1993   PetscInt sx;
1994   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1995 
1996   PetscScalar* array_b;
1997   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1998   PetscInt sb;
1999   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2000 
2001   lgraph_type&   level_graph = *lgraph_p;
2002   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2003 
2004   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2005   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2006                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2007 
2008   typedef boost::iterator_property_map<array_ref_type::iterator,
2009                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2010   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2011                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2012 
2013   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2014 
2015   PetscFunctionReturn(0);
2016 }
2017 #endif
2018 
2019 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2020   PetscInt       nzlocal,nsends,nrecvs;
2021   PetscMPIInt    *send_rank,*recv_rank;
2022   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2023   PetscScalar    *sbuf_a,**rbuf_a;
2024   PetscErrorCode (*MatDestroy)(Mat);
2025 } Mat_Redundant;
2026 
2027 #undef __FUNCT__
2028 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2029 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2030 {
2031   PetscErrorCode       ierr;
2032   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2033   PetscInt             i;
2034 
2035   PetscFunctionBegin;
2036   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2037   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2038   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2039   for (i=0; i<redund->nrecvs; i++){
2040     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2041     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2042   }
2043   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2044   ierr = PetscFree(redund);CHKERRQ(ierr);
2045   PetscFunctionReturn(0);
2046 }
2047 
2048 #undef __FUNCT__
2049 #define __FUNCT__ "MatDestroy_MatRedundant"
2050 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2051 {
2052   PetscErrorCode  ierr;
2053   PetscContainer  container;
2054   Mat_Redundant   *redund=PETSC_NULL;
2055 
2056   PetscFunctionBegin;
2057   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2058   if (container) {
2059     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2060   } else {
2061     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2062   }
2063   A->ops->destroy = redund->MatDestroy;
2064   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2065   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2066   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2072 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2073 {
2074   PetscMPIInt    rank,size;
2075   MPI_Comm       comm=((PetscObject)mat)->comm;
2076   PetscErrorCode ierr;
2077   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2078   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2079   PetscInt       *rowrange=mat->rmap->range;
2080   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2081   Mat            A=aij->A,B=aij->B,C=*matredundant;
2082   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2083   PetscScalar    *sbuf_a;
2084   PetscInt       nzlocal=a->nz+b->nz;
2085   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2086   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2087   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2088   MatScalar      *aworkA,*aworkB;
2089   PetscScalar    *vals;
2090   PetscMPIInt    tag1,tag2,tag3,imdex;
2091   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2092                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2093   MPI_Status     recv_status,*send_status;
2094   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2095   PetscInt       **rbuf_j=PETSC_NULL;
2096   PetscScalar    **rbuf_a=PETSC_NULL;
2097   Mat_Redundant  *redund=PETSC_NULL;
2098   PetscContainer container;
2099 
2100   PetscFunctionBegin;
2101   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2102   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2103 
2104   if (reuse == MAT_REUSE_MATRIX) {
2105     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2106     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2107     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2108     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2109     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2110     if (container) {
2111       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2112     } else {
2113       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2114     }
2115     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2116 
2117     nsends    = redund->nsends;
2118     nrecvs    = redund->nrecvs;
2119     send_rank = redund->send_rank;
2120     recv_rank = redund->recv_rank;
2121     sbuf_nz   = redund->sbuf_nz;
2122     rbuf_nz   = redund->rbuf_nz;
2123     sbuf_j    = redund->sbuf_j;
2124     sbuf_a    = redund->sbuf_a;
2125     rbuf_j    = redund->rbuf_j;
2126     rbuf_a    = redund->rbuf_a;
2127   }
2128 
2129   if (reuse == MAT_INITIAL_MATRIX){
2130     PetscMPIInt  subrank,subsize;
2131     PetscInt     nleftover,np_subcomm;
2132     /* get the destination processors' id send_rank, nsends and nrecvs */
2133     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2134     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2135     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2136     np_subcomm = size/nsubcomm;
2137     nleftover  = size - nsubcomm*np_subcomm;
2138     nsends = 0; nrecvs = 0;
2139     for (i=0; i<size; i++){ /* i=rank*/
2140       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2141         send_rank[nsends] = i; nsends++;
2142         recv_rank[nrecvs++] = i;
2143       }
2144     }
2145     if (rank >= size - nleftover){/* this proc is a leftover processor */
2146       i = size-nleftover-1;
2147       j = 0;
2148       while (j < nsubcomm - nleftover){
2149         send_rank[nsends++] = i;
2150         i--; j++;
2151       }
2152     }
2153 
2154     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2155       for (i=0; i<nleftover; i++){
2156         recv_rank[nrecvs++] = size-nleftover+i;
2157       }
2158     }
2159 
2160     /* allocate sbuf_j, sbuf_a */
2161     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2162     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2163     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2164   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2165 
2166   /* copy mat's local entries into the buffers */
2167   if (reuse == MAT_INITIAL_MATRIX){
2168     rownz_max = 0;
2169     rptr = sbuf_j;
2170     cols = sbuf_j + rend-rstart + 1;
2171     vals = sbuf_a;
2172     rptr[0] = 0;
2173     for (i=0; i<rend-rstart; i++){
2174       row = i + rstart;
2175       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2176       ncols  = nzA + nzB;
2177       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2178       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2179       /* load the column indices for this row into cols */
2180       lwrite = 0;
2181       for (l=0; l<nzB; l++) {
2182         if ((ctmp = bmap[cworkB[l]]) < cstart){
2183           vals[lwrite]   = aworkB[l];
2184           cols[lwrite++] = ctmp;
2185         }
2186       }
2187       for (l=0; l<nzA; l++){
2188         vals[lwrite]   = aworkA[l];
2189         cols[lwrite++] = cstart + cworkA[l];
2190       }
2191       for (l=0; l<nzB; l++) {
2192         if ((ctmp = bmap[cworkB[l]]) >= cend){
2193           vals[lwrite]   = aworkB[l];
2194           cols[lwrite++] = ctmp;
2195         }
2196       }
2197       vals += ncols;
2198       cols += ncols;
2199       rptr[i+1] = rptr[i] + ncols;
2200       if (rownz_max < ncols) rownz_max = ncols;
2201     }
2202     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2203   } else { /* only copy matrix values into sbuf_a */
2204     rptr = sbuf_j;
2205     vals = sbuf_a;
2206     rptr[0] = 0;
2207     for (i=0; i<rend-rstart; i++){
2208       row = i + rstart;
2209       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2210       ncols  = nzA + nzB;
2211       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2212       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2213       lwrite = 0;
2214       for (l=0; l<nzB; l++) {
2215         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2216       }
2217       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2218       for (l=0; l<nzB; l++) {
2219         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2220       }
2221       vals += ncols;
2222       rptr[i+1] = rptr[i] + ncols;
2223     }
2224   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2225 
2226   /* send nzlocal to others, and recv other's nzlocal */
2227   /*--------------------------------------------------*/
2228   if (reuse == MAT_INITIAL_MATRIX){
2229     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2230     s_waits2 = s_waits3 + nsends;
2231     s_waits1 = s_waits2 + nsends;
2232     r_waits1 = s_waits1 + nsends;
2233     r_waits2 = r_waits1 + nrecvs;
2234     r_waits3 = r_waits2 + nrecvs;
2235   } else {
2236     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2237     r_waits3 = s_waits3 + nsends;
2238   }
2239 
2240   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2241   if (reuse == MAT_INITIAL_MATRIX){
2242     /* get new tags to keep the communication clean */
2243     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2244     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2245     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2246 
2247     /* post receives of other's nzlocal */
2248     for (i=0; i<nrecvs; i++){
2249       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2250     }
2251     /* send nzlocal to others */
2252     for (i=0; i<nsends; i++){
2253       sbuf_nz[i] = nzlocal;
2254       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2255     }
2256     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2257     count = nrecvs;
2258     while (count) {
2259       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2260       recv_rank[imdex] = recv_status.MPI_SOURCE;
2261       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2262       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2263 
2264       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2265       rbuf_nz[imdex] += i + 2;
2266       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2267       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2268       count--;
2269     }
2270     /* wait on sends of nzlocal */
2271     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2272     /* send mat->i,j to others, and recv from other's */
2273     /*------------------------------------------------*/
2274     for (i=0; i<nsends; i++){
2275       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2276       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2277     }
2278     /* wait on receives of mat->i,j */
2279     /*------------------------------*/
2280     count = nrecvs;
2281     while (count) {
2282       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2283       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2284       count--;
2285     }
2286     /* wait on sends of mat->i,j */
2287     /*---------------------------*/
2288     if (nsends) {
2289       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2290     }
2291   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2292 
2293   /* post receives, send and receive mat->a */
2294   /*----------------------------------------*/
2295   for (imdex=0; imdex<nrecvs; imdex++) {
2296     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2297   }
2298   for (i=0; i<nsends; i++){
2299     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2300   }
2301   count = nrecvs;
2302   while (count) {
2303     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2304     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2305     count--;
2306   }
2307   if (nsends) {
2308     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2309   }
2310 
2311   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2312 
2313   /* create redundant matrix */
2314   /*-------------------------*/
2315   if (reuse == MAT_INITIAL_MATRIX){
2316     /* compute rownz_max for preallocation */
2317     for (imdex=0; imdex<nrecvs; imdex++){
2318       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2319       rptr = rbuf_j[imdex];
2320       for (i=0; i<j; i++){
2321         ncols = rptr[i+1] - rptr[i];
2322         if (rownz_max < ncols) rownz_max = ncols;
2323       }
2324     }
2325 
2326     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2327     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2328     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2329     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2330     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2331   } else {
2332     C = *matredundant;
2333   }
2334 
2335   /* insert local matrix entries */
2336   rptr = sbuf_j;
2337   cols = sbuf_j + rend-rstart + 1;
2338   vals = sbuf_a;
2339   for (i=0; i<rend-rstart; i++){
2340     row   = i + rstart;
2341     ncols = rptr[i+1] - rptr[i];
2342     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2343     vals += ncols;
2344     cols += ncols;
2345   }
2346   /* insert received matrix entries */
2347   for (imdex=0; imdex<nrecvs; imdex++){
2348     rstart = rowrange[recv_rank[imdex]];
2349     rend   = rowrange[recv_rank[imdex]+1];
2350     rptr = rbuf_j[imdex];
2351     cols = rbuf_j[imdex] + rend-rstart + 1;
2352     vals = rbuf_a[imdex];
2353     for (i=0; i<rend-rstart; i++){
2354       row   = i + rstart;
2355       ncols = rptr[i+1] - rptr[i];
2356       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2357       vals += ncols;
2358       cols += ncols;
2359     }
2360   }
2361   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2362   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2363   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2364   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2365   if (reuse == MAT_INITIAL_MATRIX){
2366     PetscContainer container;
2367     *matredundant = C;
2368     /* create a supporting struct and attach it to C for reuse */
2369     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2370     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2371     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2372     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2373     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2374 
2375     redund->nzlocal = nzlocal;
2376     redund->nsends  = nsends;
2377     redund->nrecvs  = nrecvs;
2378     redund->send_rank = send_rank;
2379     redund->recv_rank = recv_rank;
2380     redund->sbuf_nz = sbuf_nz;
2381     redund->rbuf_nz = rbuf_nz;
2382     redund->sbuf_j  = sbuf_j;
2383     redund->sbuf_a  = sbuf_a;
2384     redund->rbuf_j  = rbuf_j;
2385     redund->rbuf_a  = rbuf_a;
2386 
2387     redund->MatDestroy = C->ops->destroy;
2388     C->ops->destroy    = MatDestroy_MatRedundant;
2389   }
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2395 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2396 {
2397   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2398   PetscErrorCode ierr;
2399   PetscInt       i,*idxb = 0;
2400   PetscScalar    *va,*vb;
2401   Vec            vtmp;
2402 
2403   PetscFunctionBegin;
2404   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2405   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2406   if (idx) {
2407     for (i=0; i<A->rmap->n; i++) {
2408       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2409     }
2410   }
2411 
2412   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2413   if (idx) {
2414     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2415   }
2416   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2417   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2418 
2419   for (i=0; i<A->rmap->n; i++){
2420     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2421       va[i] = vb[i];
2422       if (idx) idx[i] = a->garray[idxb[i]];
2423     }
2424   }
2425 
2426   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2427   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2428   if (idxb) {
2429     ierr = PetscFree(idxb);CHKERRQ(ierr);
2430   }
2431   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2432   PetscFunctionReturn(0);
2433 }
2434 
2435 #undef __FUNCT__
2436 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2437 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2438 {
2439   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2440   PetscErrorCode ierr;
2441   PetscInt       i,*idxb = 0;
2442   PetscScalar    *va,*vb;
2443   Vec            vtmp;
2444 
2445   PetscFunctionBegin;
2446   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2447   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2448   if (idx) {
2449     for (i=0; i<A->cmap->n; i++) {
2450       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2451     }
2452   }
2453 
2454   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2455   if (idx) {
2456     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2457   }
2458   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2459   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2460 
2461   for (i=0; i<A->rmap->n; i++){
2462     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2463       va[i] = vb[i];
2464       if (idx) idx[i] = a->garray[idxb[i]];
2465     }
2466   }
2467 
2468   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2469   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2470   if (idxb) {
2471     ierr = PetscFree(idxb);CHKERRQ(ierr);
2472   }
2473   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 #undef __FUNCT__
2478 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2479 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2480 {
2481   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2482   PetscInt       n      = A->rmap->n;
2483   PetscInt       cstart = A->cmap->rstart;
2484   PetscInt      *cmap   = mat->garray;
2485   PetscInt      *diagIdx, *offdiagIdx;
2486   Vec            diagV, offdiagV;
2487   PetscScalar   *a, *diagA, *offdiagA;
2488   PetscInt       r;
2489   PetscErrorCode ierr;
2490 
2491   PetscFunctionBegin;
2492   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2493   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2494   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2495   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2496   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2497   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2498   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2499   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2500   for(r = 0; r < n; ++r) {
2501     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2502       a[r]   = diagA[r];
2503       idx[r] = cstart + diagIdx[r];
2504     } else {
2505       a[r]   = offdiagA[r];
2506       idx[r] = cmap[offdiagIdx[r]];
2507     }
2508   }
2509   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2510   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2511   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2512   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2513   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2514   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 #undef __FUNCT__
2519 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2520 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2521 {
2522   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2523   PetscInt       n      = A->rmap->n;
2524   PetscInt       cstart = A->cmap->rstart;
2525   PetscInt      *cmap   = mat->garray;
2526   PetscInt      *diagIdx, *offdiagIdx;
2527   Vec            diagV, offdiagV;
2528   PetscScalar   *a, *diagA, *offdiagA;
2529   PetscInt       r;
2530   PetscErrorCode ierr;
2531 
2532   PetscFunctionBegin;
2533   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2534   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2535   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2536   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2537   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2538   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2539   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2540   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2541   for(r = 0; r < n; ++r) {
2542     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2543       a[r]   = diagA[r];
2544       idx[r] = cstart + diagIdx[r];
2545     } else {
2546       a[r]   = offdiagA[r];
2547       idx[r] = cmap[offdiagIdx[r]];
2548     }
2549   }
2550   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2551   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2552   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2553   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2554   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2555   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 #undef __FUNCT__
2560 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2561 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2562 {
2563   PetscErrorCode ierr;
2564   Mat            *dummy;
2565 
2566   PetscFunctionBegin;
2567   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2568   *newmat = *dummy;
2569   ierr = PetscFree(dummy);CHKERRQ(ierr);
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2574 /* -------------------------------------------------------------------*/
2575 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2576        MatGetRow_MPIAIJ,
2577        MatRestoreRow_MPIAIJ,
2578        MatMult_MPIAIJ,
2579 /* 4*/ MatMultAdd_MPIAIJ,
2580        MatMultTranspose_MPIAIJ,
2581        MatMultTransposeAdd_MPIAIJ,
2582 #ifdef PETSC_HAVE_PBGL
2583        MatSolve_MPIAIJ,
2584 #else
2585        0,
2586 #endif
2587        0,
2588        0,
2589 /*10*/ 0,
2590        0,
2591        0,
2592        MatSOR_MPIAIJ,
2593        MatTranspose_MPIAIJ,
2594 /*15*/ MatGetInfo_MPIAIJ,
2595        MatEqual_MPIAIJ,
2596        MatGetDiagonal_MPIAIJ,
2597        MatDiagonalScale_MPIAIJ,
2598        MatNorm_MPIAIJ,
2599 /*20*/ MatAssemblyBegin_MPIAIJ,
2600        MatAssemblyEnd_MPIAIJ,
2601        MatSetOption_MPIAIJ,
2602        MatZeroEntries_MPIAIJ,
2603 /*24*/ MatZeroRows_MPIAIJ,
2604        0,
2605 #ifdef PETSC_HAVE_PBGL
2606        0,
2607 #else
2608        0,
2609 #endif
2610        0,
2611        0,
2612 /*29*/ MatSetUpPreallocation_MPIAIJ,
2613 #ifdef PETSC_HAVE_PBGL
2614        0,
2615 #else
2616        0,
2617 #endif
2618        0,
2619        0,
2620        0,
2621 /*34*/ MatDuplicate_MPIAIJ,
2622        0,
2623        0,
2624        0,
2625        0,
2626 /*39*/ MatAXPY_MPIAIJ,
2627        MatGetSubMatrices_MPIAIJ,
2628        MatIncreaseOverlap_MPIAIJ,
2629        MatGetValues_MPIAIJ,
2630        MatCopy_MPIAIJ,
2631 /*44*/ MatGetRowMax_MPIAIJ,
2632        MatScale_MPIAIJ,
2633        0,
2634        0,
2635        0,
2636 /*49*/ MatSetBlockSize_MPIAIJ,
2637        0,
2638        0,
2639        0,
2640        0,
2641 /*54*/ MatFDColoringCreate_MPIAIJ,
2642        0,
2643        MatSetUnfactored_MPIAIJ,
2644        MatPermute_MPIAIJ,
2645        0,
2646 /*59*/ MatGetSubMatrix_MPIAIJ,
2647        MatDestroy_MPIAIJ,
2648        MatView_MPIAIJ,
2649        0,
2650        0,
2651 /*64*/ 0,
2652        0,
2653        0,
2654        0,
2655        0,
2656 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2657        MatGetRowMinAbs_MPIAIJ,
2658        0,
2659        MatSetColoring_MPIAIJ,
2660 #if defined(PETSC_HAVE_ADIC)
2661        MatSetValuesAdic_MPIAIJ,
2662 #else
2663        0,
2664 #endif
2665        MatSetValuesAdifor_MPIAIJ,
2666 /*75*/ MatFDColoringApply_AIJ,
2667        0,
2668        0,
2669        0,
2670        0,
2671 /*80*/ 0,
2672        0,
2673        0,
2674 /*83*/ MatLoad_MPIAIJ,
2675        0,
2676        0,
2677        0,
2678        0,
2679        0,
2680 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2681        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2682        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2683        MatPtAP_Basic,
2684        MatPtAPSymbolic_MPIAIJ,
2685 /*94*/ MatPtAPNumeric_MPIAIJ,
2686        0,
2687        0,
2688        0,
2689        0,
2690 /*99*/ 0,
2691        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2692        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2693        MatConjugate_MPIAIJ,
2694        0,
2695 /*104*/MatSetValuesRow_MPIAIJ,
2696        MatRealPart_MPIAIJ,
2697        MatImaginaryPart_MPIAIJ,
2698        0,
2699        0,
2700 /*109*/0,
2701        MatGetRedundantMatrix_MPIAIJ,
2702        MatGetRowMin_MPIAIJ,
2703        0,
2704        0,
2705 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2706        0,
2707        0,
2708        0,
2709        0,
2710        0
2711 };
2712 
2713 /* ----------------------------------------------------------------------------------------*/
2714 
2715 EXTERN_C_BEGIN
2716 #undef __FUNCT__
2717 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2718 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2719 {
2720   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2721   PetscErrorCode ierr;
2722 
2723   PetscFunctionBegin;
2724   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2725   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2726   PetscFunctionReturn(0);
2727 }
2728 EXTERN_C_END
2729 
2730 EXTERN_C_BEGIN
2731 #undef __FUNCT__
2732 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2733 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2734 {
2735   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2736   PetscErrorCode ierr;
2737 
2738   PetscFunctionBegin;
2739   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2740   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2741   PetscFunctionReturn(0);
2742 }
2743 EXTERN_C_END
2744 
2745 #include "petscpc.h"
2746 EXTERN_C_BEGIN
2747 #undef __FUNCT__
2748 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2749 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2750 {
2751   Mat_MPIAIJ     *b;
2752   PetscErrorCode ierr;
2753   PetscInt       i;
2754 
2755   PetscFunctionBegin;
2756   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2757   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2758   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2759   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2760 
2761   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2762   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2763   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2764   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2765   if (d_nnz) {
2766     for (i=0; i<B->rmap->n; i++) {
2767       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2768     }
2769   }
2770   if (o_nnz) {
2771     for (i=0; i<B->rmap->n; i++) {
2772       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2773     }
2774   }
2775   b = (Mat_MPIAIJ*)B->data;
2776 
2777   if (!B->preallocated) {
2778     /* Explicitly create 2 MATSEQAIJ matrices. */
2779     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2780     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2781     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2782     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2783     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2784     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2785     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2786     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2787   }
2788 
2789   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2790   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2791   B->preallocated = PETSC_TRUE;
2792   PetscFunctionReturn(0);
2793 }
2794 EXTERN_C_END
2795 
2796 #undef __FUNCT__
2797 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2798 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2799 {
2800   Mat            mat;
2801   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2802   PetscErrorCode ierr;
2803 
2804   PetscFunctionBegin;
2805   *newmat       = 0;
2806   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2807   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2808   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2809   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2810   a    = (Mat_MPIAIJ*)mat->data;
2811 
2812   mat->factortype    = matin->factortype;
2813   mat->rmap->bs      = matin->rmap->bs;
2814   mat->assembled    = PETSC_TRUE;
2815   mat->insertmode   = NOT_SET_VALUES;
2816   mat->preallocated = PETSC_TRUE;
2817 
2818   a->size           = oldmat->size;
2819   a->rank           = oldmat->rank;
2820   a->donotstash     = oldmat->donotstash;
2821   a->roworiented    = oldmat->roworiented;
2822   a->rowindices     = 0;
2823   a->rowvalues      = 0;
2824   a->getrowactive   = PETSC_FALSE;
2825 
2826   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2827   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2828 
2829   if (oldmat->colmap) {
2830 #if defined (PETSC_USE_CTABLE)
2831     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2832 #else
2833     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2834     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2835     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2836 #endif
2837   } else a->colmap = 0;
2838   if (oldmat->garray) {
2839     PetscInt len;
2840     len  = oldmat->B->cmap->n;
2841     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2842     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2843     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2844   } else a->garray = 0;
2845 
2846   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2847   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2848   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2849   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2850   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2851   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2852   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2853   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2854   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2855   *newmat = mat;
2856   PetscFunctionReturn(0);
2857 }
2858 
2859 #undef __FUNCT__
2860 #define __FUNCT__ "MatLoad_MPIAIJ"
2861 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2862 {
2863   Mat            A;
2864   PetscScalar    *vals,*svals;
2865   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2866   MPI_Status     status;
2867   PetscErrorCode ierr;
2868   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2869   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2870   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2871   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2872   PetscInt       cend,cstart,n,*rowners;
2873   int            fd;
2874 
2875   PetscFunctionBegin;
2876   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2877   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2878   if (!rank) {
2879     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2880     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2881     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2882   }
2883 
2884   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2885   M = header[1]; N = header[2];
2886   /* determine ownership of all rows */
2887   m    = M/size + ((M % size) > rank);
2888   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2889   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2890 
2891   /* First process needs enough room for process with most rows */
2892   if (!rank) {
2893     mmax       = rowners[1];
2894     for (i=2; i<size; i++) {
2895       mmax = PetscMax(mmax,rowners[i]);
2896     }
2897   } else mmax = m;
2898 
2899   rowners[0] = 0;
2900   for (i=2; i<=size; i++) {
2901     rowners[i] += rowners[i-1];
2902   }
2903   rstart = rowners[rank];
2904   rend   = rowners[rank+1];
2905 
2906   /* distribute row lengths to all processors */
2907   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2908   if (!rank) {
2909     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2910     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2911     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2912     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2913     for (j=0; j<m; j++) {
2914       procsnz[0] += ourlens[j];
2915     }
2916     for (i=1; i<size; i++) {
2917       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2918       /* calculate the number of nonzeros on each processor */
2919       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2920         procsnz[i] += rowlengths[j];
2921       }
2922       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2923       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2924     }
2925     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2926   } else {
2927     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2928     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2929   }
2930 
2931   if (!rank) {
2932     /* determine max buffer needed and allocate it */
2933     maxnz = 0;
2934     for (i=0; i<size; i++) {
2935       maxnz = PetscMax(maxnz,procsnz[i]);
2936     }
2937     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2938 
2939     /* read in my part of the matrix column indices  */
2940     nz   = procsnz[0];
2941     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2942     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2943 
2944     /* read in every one elses and ship off */
2945     for (i=1; i<size; i++) {
2946       nz     = procsnz[i];
2947       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2948       mpicnt = PetscMPIIntCast(nz);
2949       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2950     }
2951     ierr = PetscFree(cols);CHKERRQ(ierr);
2952   } else {
2953     /* determine buffer space needed for message */
2954     nz = 0;
2955     for (i=0; i<m; i++) {
2956       nz += ourlens[i];
2957     }
2958     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2959 
2960     /* receive message of column indices*/
2961     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2962     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2963     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2964     if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);
2965     else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);
2966     else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);
2967   }
2968 
2969   /* determine column ownership if matrix is not square */
2970   if (N != M) {
2971     n      = N/size + ((N % size) > rank);
2972     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2973     cstart = cend - n;
2974   } else {
2975     cstart = rstart;
2976     cend   = rend;
2977     n      = cend - cstart;
2978   }
2979 
2980   /* loop over local rows, determining number of off diagonal entries */
2981   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2982   jj = 0;
2983   for (i=0; i<m; i++) {
2984     for (j=0; j<ourlens[i]; j++) {
2985       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2986       jj++;
2987     }
2988   }
2989 
2990   /* create our matrix */
2991   for (i=0; i<m; i++) {
2992     ourlens[i] -= offlens[i];
2993   }
2994   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2995   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2996   ierr = MatSetType(A,type);CHKERRQ(ierr);
2997   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2998 
2999   for (i=0; i<m; i++) {
3000     ourlens[i] += offlens[i];
3001   }
3002 
3003   if (!rank) {
3004     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3005 
3006     /* read in my part of the matrix numerical values  */
3007     nz   = procsnz[0];
3008     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3009 
3010     /* insert into matrix */
3011     jj      = rstart;
3012     smycols = mycols;
3013     svals   = vals;
3014     for (i=0; i<m; i++) {
3015       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3016       smycols += ourlens[i];
3017       svals   += ourlens[i];
3018       jj++;
3019     }
3020 
3021     /* read in other processors and ship out */
3022     for (i=1; i<size; i++) {
3023       nz     = procsnz[i];
3024       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3025       mpicnt = PetscMPIIntCast(nz);
3026       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3027     }
3028     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3029   } else {
3030     /* receive numeric values */
3031     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3032 
3033     /* receive message of values*/
3034     mpicnt = PetscMPIIntCast(nz);
3035     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3036     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3037     if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);
3038     else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);
3039     else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);
3040 
3041     /* insert into matrix */
3042     jj      = rstart;
3043     smycols = mycols;
3044     svals   = vals;
3045     for (i=0; i<m; i++) {
3046       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3047       smycols += ourlens[i];
3048       svals   += ourlens[i];
3049       jj++;
3050     }
3051   }
3052   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3053   ierr = PetscFree(vals);CHKERRQ(ierr);
3054   ierr = PetscFree(mycols);CHKERRQ(ierr);
3055   ierr = PetscFree(rowners);CHKERRQ(ierr);
3056 
3057   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3058   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3059   *newmat = A;
3060   PetscFunctionReturn(0);
3061 }
3062 
3063 #undef __FUNCT__
3064 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3065 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3066 {
3067   PetscErrorCode ierr;
3068   IS             iscol_local;
3069   PetscInt       csize;
3070 
3071   PetscFunctionBegin;
3072   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3073   if (call == MAT_REUSE_MATRIX) {
3074     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3075     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3076   } else {
3077     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3078   }
3079   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3080   if (call == MAT_INITIAL_MATRIX) {
3081     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3082     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3083   }
3084   PetscFunctionReturn(0);
3085 }
3086 
3087 #undef __FUNCT__
3088 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3089 /*
3090     Not great since it makes two copies of the submatrix, first an SeqAIJ
3091   in local and then by concatenating the local matrices the end result.
3092   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3093 
3094   Note: This requires a sequential iscol with all indices.
3095 */
3096 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3097 {
3098   PetscErrorCode ierr;
3099   PetscMPIInt    rank,size;
3100   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3101   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3102   Mat            *local,M,Mreuse;
3103   MatScalar      *vwork,*aa;
3104   MPI_Comm       comm = ((PetscObject)mat)->comm;
3105   Mat_SeqAIJ     *aij;
3106 
3107 
3108   PetscFunctionBegin;
3109   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3111 
3112   if (call ==  MAT_REUSE_MATRIX) {
3113     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3114     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3115     local = &Mreuse;
3116     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3117   } else {
3118     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3119     Mreuse = *local;
3120     ierr   = PetscFree(local);CHKERRQ(ierr);
3121   }
3122 
3123   /*
3124       m - number of local rows
3125       n - number of columns (same on all processors)
3126       rstart - first row in new global matrix generated
3127   */
3128   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3129   if (call == MAT_INITIAL_MATRIX) {
3130     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3131     ii  = aij->i;
3132     jj  = aij->j;
3133 
3134     /*
3135         Determine the number of non-zeros in the diagonal and off-diagonal
3136         portions of the matrix in order to do correct preallocation
3137     */
3138 
3139     /* first get start and end of "diagonal" columns */
3140     if (csize == PETSC_DECIDE) {
3141       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3142       if (mglobal == n) { /* square matrix */
3143 	nlocal = m;
3144       } else {
3145         nlocal = n/size + ((n % size) > rank);
3146       }
3147     } else {
3148       nlocal = csize;
3149     }
3150     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3151     rstart = rend - nlocal;
3152     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3153 
3154     /* next, compute all the lengths */
3155     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3156     olens = dlens + m;
3157     for (i=0; i<m; i++) {
3158       jend = ii[i+1] - ii[i];
3159       olen = 0;
3160       dlen = 0;
3161       for (j=0; j<jend; j++) {
3162         if (*jj < rstart || *jj >= rend) olen++;
3163         else dlen++;
3164         jj++;
3165       }
3166       olens[i] = olen;
3167       dlens[i] = dlen;
3168     }
3169     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3170     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3171     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3172     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3173     ierr = PetscFree(dlens);CHKERRQ(ierr);
3174   } else {
3175     PetscInt ml,nl;
3176 
3177     M = *newmat;
3178     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3179     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3180     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3181     /*
3182          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3183        rather than the slower MatSetValues().
3184     */
3185     M->was_assembled = PETSC_TRUE;
3186     M->assembled     = PETSC_FALSE;
3187   }
3188   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3189   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3190   ii  = aij->i;
3191   jj  = aij->j;
3192   aa  = aij->a;
3193   for (i=0; i<m; i++) {
3194     row   = rstart + i;
3195     nz    = ii[i+1] - ii[i];
3196     cwork = jj;     jj += nz;
3197     vwork = aa;     aa += nz;
3198     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3199   }
3200 
3201   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3202   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3203   *newmat = M;
3204 
3205   /* save submatrix used in processor for next request */
3206   if (call ==  MAT_INITIAL_MATRIX) {
3207     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3208     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3209   }
3210 
3211   PetscFunctionReturn(0);
3212 }
3213 
3214 EXTERN_C_BEGIN
3215 #undef __FUNCT__
3216 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3217 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3218 {
3219   PetscInt       m,cstart, cend,j,nnz,i,d;
3220   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3221   const PetscInt *JJ;
3222   PetscScalar    *values;
3223   PetscErrorCode ierr;
3224 
3225   PetscFunctionBegin;
3226   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3227 
3228   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3229   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3230   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3231   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3232   m      = B->rmap->n;
3233   cstart = B->cmap->rstart;
3234   cend   = B->cmap->rend;
3235   rstart = B->rmap->rstart;
3236 
3237   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3238 
3239 #if defined(PETSC_USE_DEBUGGING)
3240   for (i=0; i<m; i++) {
3241     nnz     = Ii[i+1]- Ii[i];
3242     JJ      = J + Ii[i];
3243     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3244     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3245     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3246   }
3247 #endif
3248 
3249   for (i=0; i<m; i++) {
3250     nnz     = Ii[i+1]- Ii[i];
3251     JJ      = J + Ii[i];
3252     nnz_max = PetscMax(nnz_max,nnz);
3253     d       = 0;
3254     for (j=0; j<nnz; j++) {
3255       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3256     }
3257     d_nnz[i] = d;
3258     o_nnz[i] = nnz - d;
3259   }
3260   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3261   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3262 
3263   if (v) values = (PetscScalar*)v;
3264   else {
3265     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3266     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3267   }
3268 
3269   for (i=0; i<m; i++) {
3270     ii   = i + rstart;
3271     nnz  = Ii[i+1]- Ii[i];
3272     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3273   }
3274   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3275   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3276 
3277   if (!v) {
3278     ierr = PetscFree(values);CHKERRQ(ierr);
3279   }
3280   PetscFunctionReturn(0);
3281 }
3282 EXTERN_C_END
3283 
3284 #undef __FUNCT__
3285 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3286 /*@
3287    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3288    (the default parallel PETSc format).
3289 
3290    Collective on MPI_Comm
3291 
3292    Input Parameters:
3293 +  B - the matrix
3294 .  i - the indices into j for the start of each local row (starts with zero)
3295 .  j - the column indices for each local row (starts with zero)
3296 -  v - optional values in the matrix
3297 
3298    Level: developer
3299 
3300    Notes:
3301        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3302      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3303      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3304 
3305        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3306 
3307        The format which is used for the sparse matrix input, is equivalent to a
3308     row-major ordering.. i.e for the following matrix, the input data expected is
3309     as shown:
3310 
3311         1 0 0
3312         2 0 3     P0
3313        -------
3314         4 5 6     P1
3315 
3316      Process0 [P0]: rows_owned=[0,1]
3317         i =  {0,1,3}  [size = nrow+1  = 2+1]
3318         j =  {0,0,2}  [size = nz = 6]
3319         v =  {1,2,3}  [size = nz = 6]
3320 
3321      Process1 [P1]: rows_owned=[2]
3322         i =  {0,3}    [size = nrow+1  = 1+1]
3323         j =  {0,1,2}  [size = nz = 6]
3324         v =  {4,5,6}  [size = nz = 6]
3325 
3326 .keywords: matrix, aij, compressed row, sparse, parallel
3327 
3328 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3329           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3330 @*/
3331 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3332 {
3333   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3334 
3335   PetscFunctionBegin;
3336   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3337   if (f) {
3338     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3339   }
3340   PetscFunctionReturn(0);
3341 }
3342 
3343 #undef __FUNCT__
3344 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3345 /*@C
3346    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3347    (the default parallel PETSc format).  For good matrix assembly performance
3348    the user should preallocate the matrix storage by setting the parameters
3349    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3350    performance can be increased by more than a factor of 50.
3351 
3352    Collective on MPI_Comm
3353 
3354    Input Parameters:
3355 +  A - the matrix
3356 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3357            (same value is used for all local rows)
3358 .  d_nnz - array containing the number of nonzeros in the various rows of the
3359            DIAGONAL portion of the local submatrix (possibly different for each row)
3360            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3361            The size of this array is equal to the number of local rows, i.e 'm'.
3362            You must leave room for the diagonal entry even if it is zero.
3363 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3364            submatrix (same value is used for all local rows).
3365 -  o_nnz - array containing the number of nonzeros in the various rows of the
3366            OFF-DIAGONAL portion of the local submatrix (possibly different for
3367            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3368            structure. The size of this array is equal to the number
3369            of local rows, i.e 'm'.
3370 
3371    If the *_nnz parameter is given then the *_nz parameter is ignored
3372 
3373    The AIJ format (also called the Yale sparse matrix format or
3374    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3375    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3376 
3377    The parallel matrix is partitioned such that the first m0 rows belong to
3378    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3379    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3380 
3381    The DIAGONAL portion of the local submatrix of a processor can be defined
3382    as the submatrix which is obtained by extraction the part corresponding
3383    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3384    first row that belongs to the processor, and r2 is the last row belonging
3385    to the this processor. This is a square mxm matrix. The remaining portion
3386    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3387 
3388    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3389 
3390    You can call MatGetInfo() to get information on how effective the preallocation was;
3391    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3392    You can also run with the option -info and look for messages with the string
3393    malloc in them to see if additional memory allocation was needed.
3394 
3395    Example usage:
3396 
3397    Consider the following 8x8 matrix with 34 non-zero values, that is
3398    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3399    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3400    as follows:
3401 
3402 .vb
3403             1  2  0  |  0  3  0  |  0  4
3404     Proc0   0  5  6  |  7  0  0  |  8  0
3405             9  0 10  | 11  0  0  | 12  0
3406     -------------------------------------
3407            13  0 14  | 15 16 17  |  0  0
3408     Proc1   0 18  0  | 19 20 21  |  0  0
3409             0  0  0  | 22 23  0  | 24  0
3410     -------------------------------------
3411     Proc2  25 26 27  |  0  0 28  | 29  0
3412            30  0  0  | 31 32 33  |  0 34
3413 .ve
3414 
3415    This can be represented as a collection of submatrices as:
3416 
3417 .vb
3418       A B C
3419       D E F
3420       G H I
3421 .ve
3422 
3423    Where the submatrices A,B,C are owned by proc0, D,E,F are
3424    owned by proc1, G,H,I are owned by proc2.
3425 
3426    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3427    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3428    The 'M','N' parameters are 8,8, and have the same values on all procs.
3429 
3430    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3431    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3432    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3433    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3434    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3435    matrix, ans [DF] as another SeqAIJ matrix.
3436 
3437    When d_nz, o_nz parameters are specified, d_nz storage elements are
3438    allocated for every row of the local diagonal submatrix, and o_nz
3439    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3440    One way to choose d_nz and o_nz is to use the max nonzerors per local
3441    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3442    In this case, the values of d_nz,o_nz are:
3443 .vb
3444      proc0 : dnz = 2, o_nz = 2
3445      proc1 : dnz = 3, o_nz = 2
3446      proc2 : dnz = 1, o_nz = 4
3447 .ve
3448    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3449    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3450    for proc3. i.e we are using 12+15+10=37 storage locations to store
3451    34 values.
3452 
3453    When d_nnz, o_nnz parameters are specified, the storage is specified
3454    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3455    In the above case the values for d_nnz,o_nnz are:
3456 .vb
3457      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3458      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3459      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3460 .ve
3461    Here the space allocated is sum of all the above values i.e 34, and
3462    hence pre-allocation is perfect.
3463 
3464    Level: intermediate
3465 
3466 .keywords: matrix, aij, compressed row, sparse, parallel
3467 
3468 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3469           MPIAIJ, MatGetInfo()
3470 @*/
3471 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3472 {
3473   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3474 
3475   PetscFunctionBegin;
3476   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3477   if (f) {
3478     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3479   }
3480   PetscFunctionReturn(0);
3481 }
3482 
3483 #undef __FUNCT__
3484 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3485 /*@
3486      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3487          CSR format the local rows.
3488 
3489    Collective on MPI_Comm
3490 
3491    Input Parameters:
3492 +  comm - MPI communicator
3493 .  m - number of local rows (Cannot be PETSC_DECIDE)
3494 .  n - This value should be the same as the local size used in creating the
3495        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3496        calculated if N is given) For square matrices n is almost always m.
3497 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3498 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3499 .   i - row indices
3500 .   j - column indices
3501 -   a - matrix values
3502 
3503    Output Parameter:
3504 .   mat - the matrix
3505 
3506    Level: intermediate
3507 
3508    Notes:
3509        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3510      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3511      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3512 
3513        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3514 
3515        The format which is used for the sparse matrix input, is equivalent to a
3516     row-major ordering.. i.e for the following matrix, the input data expected is
3517     as shown:
3518 
3519         1 0 0
3520         2 0 3     P0
3521        -------
3522         4 5 6     P1
3523 
3524      Process0 [P0]: rows_owned=[0,1]
3525         i =  {0,1,3}  [size = nrow+1  = 2+1]
3526         j =  {0,0,2}  [size = nz = 6]
3527         v =  {1,2,3}  [size = nz = 6]
3528 
3529      Process1 [P1]: rows_owned=[2]
3530         i =  {0,3}    [size = nrow+1  = 1+1]
3531         j =  {0,1,2}  [size = nz = 6]
3532         v =  {4,5,6}  [size = nz = 6]
3533 
3534 .keywords: matrix, aij, compressed row, sparse, parallel
3535 
3536 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3537           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3538 @*/
3539 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3540 {
3541   PetscErrorCode ierr;
3542 
3543  PetscFunctionBegin;
3544   if (i[0]) {
3545     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3546   }
3547   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3548   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3549   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3550   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3551   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3552   PetscFunctionReturn(0);
3553 }
3554 
3555 #undef __FUNCT__
3556 #define __FUNCT__ "MatCreateMPIAIJ"
3557 /*@C
3558    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3559    (the default parallel PETSc format).  For good matrix assembly performance
3560    the user should preallocate the matrix storage by setting the parameters
3561    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3562    performance can be increased by more than a factor of 50.
3563 
3564    Collective on MPI_Comm
3565 
3566    Input Parameters:
3567 +  comm - MPI communicator
3568 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3569            This value should be the same as the local size used in creating the
3570            y vector for the matrix-vector product y = Ax.
3571 .  n - This value should be the same as the local size used in creating the
3572        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3573        calculated if N is given) For square matrices n is almost always m.
3574 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3575 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3576 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3577            (same value is used for all local rows)
3578 .  d_nnz - array containing the number of nonzeros in the various rows of the
3579            DIAGONAL portion of the local submatrix (possibly different for each row)
3580            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3581            The size of this array is equal to the number of local rows, i.e 'm'.
3582            You must leave room for the diagonal entry even if it is zero.
3583 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3584            submatrix (same value is used for all local rows).
3585 -  o_nnz - array containing the number of nonzeros in the various rows of the
3586            OFF-DIAGONAL portion of the local submatrix (possibly different for
3587            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3588            structure. The size of this array is equal to the number
3589            of local rows, i.e 'm'.
3590 
3591    Output Parameter:
3592 .  A - the matrix
3593 
3594    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3595    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3596    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3597 
3598    Notes:
3599    If the *_nnz parameter is given then the *_nz parameter is ignored
3600 
3601    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3602    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3603    storage requirements for this matrix.
3604 
3605    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3606    processor than it must be used on all processors that share the object for
3607    that argument.
3608 
3609    The user MUST specify either the local or global matrix dimensions
3610    (possibly both).
3611 
3612    The parallel matrix is partitioned across processors such that the
3613    first m0 rows belong to process 0, the next m1 rows belong to
3614    process 1, the next m2 rows belong to process 2 etc.. where
3615    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3616    values corresponding to [m x N] submatrix.
3617 
3618    The columns are logically partitioned with the n0 columns belonging
3619    to 0th partition, the next n1 columns belonging to the next
3620    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3621 
3622    The DIAGONAL portion of the local submatrix on any given processor
3623    is the submatrix corresponding to the rows and columns m,n
3624    corresponding to the given processor. i.e diagonal matrix on
3625    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3626    etc. The remaining portion of the local submatrix [m x (N-n)]
3627    constitute the OFF-DIAGONAL portion. The example below better
3628    illustrates this concept.
3629 
3630    For a square global matrix we define each processor's diagonal portion
3631    to be its local rows and the corresponding columns (a square submatrix);
3632    each processor's off-diagonal portion encompasses the remainder of the
3633    local matrix (a rectangular submatrix).
3634 
3635    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3636 
3637    When calling this routine with a single process communicator, a matrix of
3638    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3639    type of communicator, use the construction mechanism:
3640      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3641 
3642    By default, this format uses inodes (identical nodes) when possible.
3643    We search for consecutive rows with the same nonzero structure, thereby
3644    reusing matrix information to achieve increased efficiency.
3645 
3646    Options Database Keys:
3647 +  -mat_no_inode  - Do not use inodes
3648 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3649 -  -mat_aij_oneindex - Internally use indexing starting at 1
3650         rather than 0.  Note that when calling MatSetValues(),
3651         the user still MUST index entries starting at 0!
3652 
3653 
3654    Example usage:
3655 
3656    Consider the following 8x8 matrix with 34 non-zero values, that is
3657    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3658    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3659    as follows:
3660 
3661 .vb
3662             1  2  0  |  0  3  0  |  0  4
3663     Proc0   0  5  6  |  7  0  0  |  8  0
3664             9  0 10  | 11  0  0  | 12  0
3665     -------------------------------------
3666            13  0 14  | 15 16 17  |  0  0
3667     Proc1   0 18  0  | 19 20 21  |  0  0
3668             0  0  0  | 22 23  0  | 24  0
3669     -------------------------------------
3670     Proc2  25 26 27  |  0  0 28  | 29  0
3671            30  0  0  | 31 32 33  |  0 34
3672 .ve
3673 
3674    This can be represented as a collection of submatrices as:
3675 
3676 .vb
3677       A B C
3678       D E F
3679       G H I
3680 .ve
3681 
3682    Where the submatrices A,B,C are owned by proc0, D,E,F are
3683    owned by proc1, G,H,I are owned by proc2.
3684 
3685    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3686    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3687    The 'M','N' parameters are 8,8, and have the same values on all procs.
3688 
3689    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3690    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3691    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3692    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3693    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3694    matrix, ans [DF] as another SeqAIJ matrix.
3695 
3696    When d_nz, o_nz parameters are specified, d_nz storage elements are
3697    allocated for every row of the local diagonal submatrix, and o_nz
3698    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3699    One way to choose d_nz and o_nz is to use the max nonzerors per local
3700    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3701    In this case, the values of d_nz,o_nz are:
3702 .vb
3703      proc0 : dnz = 2, o_nz = 2
3704      proc1 : dnz = 3, o_nz = 2
3705      proc2 : dnz = 1, o_nz = 4
3706 .ve
3707    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3708    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3709    for proc3. i.e we are using 12+15+10=37 storage locations to store
3710    34 values.
3711 
3712    When d_nnz, o_nnz parameters are specified, the storage is specified
3713    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3714    In the above case the values for d_nnz,o_nnz are:
3715 .vb
3716      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3717      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3718      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3719 .ve
3720    Here the space allocated is sum of all the above values i.e 34, and
3721    hence pre-allocation is perfect.
3722 
3723    Level: intermediate
3724 
3725 .keywords: matrix, aij, compressed row, sparse, parallel
3726 
3727 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3728           MPIAIJ, MatCreateMPIAIJWithArrays()
3729 @*/
3730 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3731 {
3732   PetscErrorCode ierr;
3733   PetscMPIInt    size;
3734 
3735   PetscFunctionBegin;
3736   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3737   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3738   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3739   if (size > 1) {
3740     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3741     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3742   } else {
3743     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3744     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3745   }
3746   PetscFunctionReturn(0);
3747 }
3748 
3749 #undef __FUNCT__
3750 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3751 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3752 {
3753   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3754 
3755   PetscFunctionBegin;
3756   *Ad     = a->A;
3757   *Ao     = a->B;
3758   *colmap = a->garray;
3759   PetscFunctionReturn(0);
3760 }
3761 
3762 #undef __FUNCT__
3763 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3764 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3765 {
3766   PetscErrorCode ierr;
3767   PetscInt       i;
3768   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3769 
3770   PetscFunctionBegin;
3771   if (coloring->ctype == IS_COLORING_GLOBAL) {
3772     ISColoringValue *allcolors,*colors;
3773     ISColoring      ocoloring;
3774 
3775     /* set coloring for diagonal portion */
3776     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3777 
3778     /* set coloring for off-diagonal portion */
3779     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3780     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3781     for (i=0; i<a->B->cmap->n; i++) {
3782       colors[i] = allcolors[a->garray[i]];
3783     }
3784     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3785     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3786     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3787     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3788   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3789     ISColoringValue *colors;
3790     PetscInt        *larray;
3791     ISColoring      ocoloring;
3792 
3793     /* set coloring for diagonal portion */
3794     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3795     for (i=0; i<a->A->cmap->n; i++) {
3796       larray[i] = i + A->cmap->rstart;
3797     }
3798     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3799     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3800     for (i=0; i<a->A->cmap->n; i++) {
3801       colors[i] = coloring->colors[larray[i]];
3802     }
3803     ierr = PetscFree(larray);CHKERRQ(ierr);
3804     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3805     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3806     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3807 
3808     /* set coloring for off-diagonal portion */
3809     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3810     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3811     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3812     for (i=0; i<a->B->cmap->n; i++) {
3813       colors[i] = coloring->colors[larray[i]];
3814     }
3815     ierr = PetscFree(larray);CHKERRQ(ierr);
3816     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3817     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3818     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3819   } else {
3820     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3821   }
3822 
3823   PetscFunctionReturn(0);
3824 }
3825 
3826 #if defined(PETSC_HAVE_ADIC)
3827 #undef __FUNCT__
3828 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3829 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3830 {
3831   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3832   PetscErrorCode ierr;
3833 
3834   PetscFunctionBegin;
3835   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3836   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3837   PetscFunctionReturn(0);
3838 }
3839 #endif
3840 
3841 #undef __FUNCT__
3842 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3843 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3844 {
3845   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3846   PetscErrorCode ierr;
3847 
3848   PetscFunctionBegin;
3849   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3850   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3851   PetscFunctionReturn(0);
3852 }
3853 
3854 #undef __FUNCT__
3855 #define __FUNCT__ "MatMerge"
3856 /*@
3857       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3858                  matrices from each processor
3859 
3860     Collective on MPI_Comm
3861 
3862    Input Parameters:
3863 +    comm - the communicators the parallel matrix will live on
3864 .    inmat - the input sequential matrices
3865 .    n - number of local columns (or PETSC_DECIDE)
3866 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3867 
3868    Output Parameter:
3869 .    outmat - the parallel matrix generated
3870 
3871     Level: advanced
3872 
3873    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3874 
3875 @*/
3876 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3877 {
3878   PetscErrorCode ierr;
3879   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3880   PetscInt       *indx;
3881   PetscScalar    *values;
3882 
3883   PetscFunctionBegin;
3884   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3885   if (scall == MAT_INITIAL_MATRIX){
3886     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3887     if (n == PETSC_DECIDE){
3888       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3889     }
3890     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3891     rstart -= m;
3892 
3893     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3894     for (i=0;i<m;i++) {
3895       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3896       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3897       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3898     }
3899     /* This routine will ONLY return MPIAIJ type matrix */
3900     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3901     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3902     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3903     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3904     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3905 
3906   } else if (scall == MAT_REUSE_MATRIX){
3907     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3908   } else {
3909     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3910   }
3911 
3912   for (i=0;i<m;i++) {
3913     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3914     Ii    = i + rstart;
3915     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3916     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3917   }
3918   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3919   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3920   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3921 
3922   PetscFunctionReturn(0);
3923 }
3924 
3925 #undef __FUNCT__
3926 #define __FUNCT__ "MatFileSplit"
3927 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3928 {
3929   PetscErrorCode    ierr;
3930   PetscMPIInt       rank;
3931   PetscInt          m,N,i,rstart,nnz;
3932   size_t            len;
3933   const PetscInt    *indx;
3934   PetscViewer       out;
3935   char              *name;
3936   Mat               B;
3937   const PetscScalar *values;
3938 
3939   PetscFunctionBegin;
3940   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3941   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3942   /* Should this be the type of the diagonal block of A? */
3943   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3944   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3945   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3946   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3947   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3948   for (i=0;i<m;i++) {
3949     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3950     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3951     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3952   }
3953   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3954   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3955 
3956   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3957   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3958   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3959   sprintf(name,"%s.%d",outfile,rank);
3960   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3961   ierr = PetscFree(name);
3962   ierr = MatView(B,out);CHKERRQ(ierr);
3963   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3964   ierr = MatDestroy(B);CHKERRQ(ierr);
3965   PetscFunctionReturn(0);
3966 }
3967 
3968 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3969 #undef __FUNCT__
3970 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3971 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3972 {
3973   PetscErrorCode       ierr;
3974   Mat_Merge_SeqsToMPI  *merge;
3975   PetscContainer       container;
3976 
3977   PetscFunctionBegin;
3978   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3979   if (container) {
3980     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3981     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3982     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3983     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3984     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3985     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3986     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3987     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3988     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3989     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3990     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3991     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3992     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3993     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
3994 
3995     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3996     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3997   }
3998   ierr = PetscFree(merge);CHKERRQ(ierr);
3999 
4000   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4001   PetscFunctionReturn(0);
4002 }
4003 
4004 #include "../src/mat/utils/freespace.h"
4005 #include "petscbt.h"
4006 
4007 #undef __FUNCT__
4008 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4009 /*@C
4010       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4011                  matrices from each processor
4012 
4013     Collective on MPI_Comm
4014 
4015    Input Parameters:
4016 +    comm - the communicators the parallel matrix will live on
4017 .    seqmat - the input sequential matrices
4018 .    m - number of local rows (or PETSC_DECIDE)
4019 .    n - number of local columns (or PETSC_DECIDE)
4020 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4021 
4022    Output Parameter:
4023 .    mpimat - the parallel matrix generated
4024 
4025     Level: advanced
4026 
4027    Notes:
4028      The dimensions of the sequential matrix in each processor MUST be the same.
4029      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4030      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4031 @*/
4032 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4033 {
4034   PetscErrorCode       ierr;
4035   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4036   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4037   PetscMPIInt          size,rank,taga,*len_s;
4038   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4039   PetscInt             proc,m;
4040   PetscInt             **buf_ri,**buf_rj;
4041   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4042   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4043   MPI_Request          *s_waits,*r_waits;
4044   MPI_Status           *status;
4045   MatScalar            *aa=a->a;
4046   MatScalar            **abuf_r,*ba_i;
4047   Mat_Merge_SeqsToMPI  *merge;
4048   PetscContainer       container;
4049 
4050   PetscFunctionBegin;
4051   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4052 
4053   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4054   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4055 
4056   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4057   if (container) {
4058     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4059   }
4060   bi     = merge->bi;
4061   bj     = merge->bj;
4062   buf_ri = merge->buf_ri;
4063   buf_rj = merge->buf_rj;
4064 
4065   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4066   owners = merge->rowmap->range;
4067   len_s  = merge->len_s;
4068 
4069   /* send and recv matrix values */
4070   /*-----------------------------*/
4071   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4072   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4073 
4074   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4075   for (proc=0,k=0; proc<size; proc++){
4076     if (!len_s[proc]) continue;
4077     i = owners[proc];
4078     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4079     k++;
4080   }
4081 
4082   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4083   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4084   ierr = PetscFree(status);CHKERRQ(ierr);
4085 
4086   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4087   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4088 
4089   /* insert mat values of mpimat */
4090   /*----------------------------*/
4091   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4092   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4093 
4094   for (k=0; k<merge->nrecv; k++){
4095     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4096     nrows = *(buf_ri_k[k]);
4097     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4098     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4099   }
4100 
4101   /* set values of ba */
4102   m = merge->rowmap->n;
4103   for (i=0; i<m; i++) {
4104     arow = owners[rank] + i;
4105     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4106     bnzi = bi[i+1] - bi[i];
4107     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4108 
4109     /* add local non-zero vals of this proc's seqmat into ba */
4110     anzi = ai[arow+1] - ai[arow];
4111     aj   = a->j + ai[arow];
4112     aa   = a->a + ai[arow];
4113     nextaj = 0;
4114     for (j=0; nextaj<anzi; j++){
4115       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4116         ba_i[j] += aa[nextaj++];
4117       }
4118     }
4119 
4120     /* add received vals into ba */
4121     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4122       /* i-th row */
4123       if (i == *nextrow[k]) {
4124         anzi = *(nextai[k]+1) - *nextai[k];
4125         aj   = buf_rj[k] + *(nextai[k]);
4126         aa   = abuf_r[k] + *(nextai[k]);
4127         nextaj = 0;
4128         for (j=0; nextaj<anzi; j++){
4129           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4130             ba_i[j] += aa[nextaj++];
4131           }
4132         }
4133         nextrow[k]++; nextai[k]++;
4134       }
4135     }
4136     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4137   }
4138   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4139   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4140 
4141   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4142   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4143   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4144   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4145   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4146   PetscFunctionReturn(0);
4147 }
4148 
4149 #undef __FUNCT__
4150 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4151 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4152 {
4153   PetscErrorCode       ierr;
4154   Mat                  B_mpi;
4155   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4156   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4157   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4158   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4159   PetscInt             len,proc,*dnz,*onz;
4160   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4161   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4162   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4163   MPI_Status           *status;
4164   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4165   PetscBT              lnkbt;
4166   Mat_Merge_SeqsToMPI  *merge;
4167   PetscContainer       container;
4168 
4169   PetscFunctionBegin;
4170   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4171 
4172   /* make sure it is a PETSc comm */
4173   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4175   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4176 
4177   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4178   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4179 
4180   /* determine row ownership */
4181   /*---------------------------------------------------------*/
4182   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4183   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4184   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4185   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4186   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4187   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4188   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4189 
4190   m      = merge->rowmap->n;
4191   M      = merge->rowmap->N;
4192   owners = merge->rowmap->range;
4193 
4194   /* determine the number of messages to send, their lengths */
4195   /*---------------------------------------------------------*/
4196   len_s  = merge->len_s;
4197 
4198   len = 0;  /* length of buf_si[] */
4199   merge->nsend = 0;
4200   for (proc=0; proc<size; proc++){
4201     len_si[proc] = 0;
4202     if (proc == rank){
4203       len_s[proc] = 0;
4204     } else {
4205       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4206       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4207     }
4208     if (len_s[proc]) {
4209       merge->nsend++;
4210       nrows = 0;
4211       for (i=owners[proc]; i<owners[proc+1]; i++){
4212         if (ai[i+1] > ai[i]) nrows++;
4213       }
4214       len_si[proc] = 2*(nrows+1);
4215       len += len_si[proc];
4216     }
4217   }
4218 
4219   /* determine the number and length of messages to receive for ij-structure */
4220   /*-------------------------------------------------------------------------*/
4221   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4222   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4223 
4224   /* post the Irecv of j-structure */
4225   /*-------------------------------*/
4226   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4227   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4228 
4229   /* post the Isend of j-structure */
4230   /*--------------------------------*/
4231   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4232 
4233   for (proc=0, k=0; proc<size; proc++){
4234     if (!len_s[proc]) continue;
4235     i = owners[proc];
4236     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4237     k++;
4238   }
4239 
4240   /* receives and sends of j-structure are complete */
4241   /*------------------------------------------------*/
4242   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4243   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4244 
4245   /* send and recv i-structure */
4246   /*---------------------------*/
4247   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4248   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4249 
4250   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4251   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4252   for (proc=0,k=0; proc<size; proc++){
4253     if (!len_s[proc]) continue;
4254     /* form outgoing message for i-structure:
4255          buf_si[0]:                 nrows to be sent
4256                [1:nrows]:           row index (global)
4257                [nrows+1:2*nrows+1]: i-structure index
4258     */
4259     /*-------------------------------------------*/
4260     nrows = len_si[proc]/2 - 1;
4261     buf_si_i    = buf_si + nrows+1;
4262     buf_si[0]   = nrows;
4263     buf_si_i[0] = 0;
4264     nrows = 0;
4265     for (i=owners[proc]; i<owners[proc+1]; i++){
4266       anzi = ai[i+1] - ai[i];
4267       if (anzi) {
4268         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4269         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4270         nrows++;
4271       }
4272     }
4273     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4274     k++;
4275     buf_si += len_si[proc];
4276   }
4277 
4278   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4279   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4280 
4281   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4282   for (i=0; i<merge->nrecv; i++){
4283     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4284   }
4285 
4286   ierr = PetscFree(len_si);CHKERRQ(ierr);
4287   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4288   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4289   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4290   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4291   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4292   ierr = PetscFree(status);CHKERRQ(ierr);
4293 
4294   /* compute a local seq matrix in each processor */
4295   /*----------------------------------------------*/
4296   /* allocate bi array and free space for accumulating nonzero column info */
4297   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4298   bi[0] = 0;
4299 
4300   /* create and initialize a linked list */
4301   nlnk = N+1;
4302   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4303 
4304   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4305   len = 0;
4306   len  = ai[owners[rank+1]] - ai[owners[rank]];
4307   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4308   current_space = free_space;
4309 
4310   /* determine symbolic info for each local row */
4311   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4312 
4313   for (k=0; k<merge->nrecv; k++){
4314     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4315     nrows = *buf_ri_k[k];
4316     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4317     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4318   }
4319 
4320   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4321   len = 0;
4322   for (i=0;i<m;i++) {
4323     bnzi   = 0;
4324     /* add local non-zero cols of this proc's seqmat into lnk */
4325     arow   = owners[rank] + i;
4326     anzi   = ai[arow+1] - ai[arow];
4327     aj     = a->j + ai[arow];
4328     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4329     bnzi += nlnk;
4330     /* add received col data into lnk */
4331     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4332       if (i == *nextrow[k]) { /* i-th row */
4333         anzi = *(nextai[k]+1) - *nextai[k];
4334         aj   = buf_rj[k] + *nextai[k];
4335         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4336         bnzi += nlnk;
4337         nextrow[k]++; nextai[k]++;
4338       }
4339     }
4340     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4341 
4342     /* if free space is not available, make more free space */
4343     if (current_space->local_remaining<bnzi) {
4344       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4345       nspacedouble++;
4346     }
4347     /* copy data into free space, then initialize lnk */
4348     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4349     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4350 
4351     current_space->array           += bnzi;
4352     current_space->local_used      += bnzi;
4353     current_space->local_remaining -= bnzi;
4354 
4355     bi[i+1] = bi[i] + bnzi;
4356   }
4357 
4358   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4359 
4360   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4361   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4362   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4363 
4364   /* create symbolic parallel matrix B_mpi */
4365   /*---------------------------------------*/
4366   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4367   if (n==PETSC_DECIDE) {
4368     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4369   } else {
4370     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4371   }
4372   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4373   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4374   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4375 
4376   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4377   B_mpi->assembled     = PETSC_FALSE;
4378   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4379   merge->bi            = bi;
4380   merge->bj            = bj;
4381   merge->buf_ri        = buf_ri;
4382   merge->buf_rj        = buf_rj;
4383   merge->coi           = PETSC_NULL;
4384   merge->coj           = PETSC_NULL;
4385   merge->owners_co     = PETSC_NULL;
4386 
4387   /* attach the supporting struct to B_mpi for reuse */
4388   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4389   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4390   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4391   *mpimat = B_mpi;
4392 
4393   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4394   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4395   PetscFunctionReturn(0);
4396 }
4397 
4398 #undef __FUNCT__
4399 #define __FUNCT__ "MatMerge_SeqsToMPI"
4400 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4401 {
4402   PetscErrorCode   ierr;
4403 
4404   PetscFunctionBegin;
4405   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4406   if (scall == MAT_INITIAL_MATRIX){
4407     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4408   }
4409   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4410   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4411   PetscFunctionReturn(0);
4412 }
4413 
4414 #undef __FUNCT__
4415 #define __FUNCT__ "MatGetLocalMat"
4416 /*@
4417      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4418 
4419     Not Collective
4420 
4421    Input Parameters:
4422 +    A - the matrix
4423 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4424 
4425    Output Parameter:
4426 .    A_loc - the local sequential matrix generated
4427 
4428     Level: developer
4429 
4430 @*/
4431 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4432 {
4433   PetscErrorCode  ierr;
4434   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4435   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4436   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4437   MatScalar       *aa=a->a,*ba=b->a,*cam;
4438   PetscScalar     *ca;
4439   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4440   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4441 
4442   PetscFunctionBegin;
4443   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4444   if (scall == MAT_INITIAL_MATRIX){
4445     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4446     ci[0] = 0;
4447     for (i=0; i<am; i++){
4448       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4449     }
4450     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4451     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4452     k = 0;
4453     for (i=0; i<am; i++) {
4454       ncols_o = bi[i+1] - bi[i];
4455       ncols_d = ai[i+1] - ai[i];
4456       /* off-diagonal portion of A */
4457       for (jo=0; jo<ncols_o; jo++) {
4458         col = cmap[*bj];
4459         if (col >= cstart) break;
4460         cj[k]   = col; bj++;
4461         ca[k++] = *ba++;
4462       }
4463       /* diagonal portion of A */
4464       for (j=0; j<ncols_d; j++) {
4465         cj[k]   = cstart + *aj++;
4466         ca[k++] = *aa++;
4467       }
4468       /* off-diagonal portion of A */
4469       for (j=jo; j<ncols_o; j++) {
4470         cj[k]   = cmap[*bj++];
4471         ca[k++] = *ba++;
4472       }
4473     }
4474     /* put together the new matrix */
4475     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4476     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4477     /* Since these are PETSc arrays, change flags to free them as necessary. */
4478     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4479     mat->free_a  = PETSC_TRUE;
4480     mat->free_ij = PETSC_TRUE;
4481     mat->nonew   = 0;
4482   } else if (scall == MAT_REUSE_MATRIX){
4483     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4484     ci = mat->i; cj = mat->j; cam = mat->a;
4485     for (i=0; i<am; i++) {
4486       /* off-diagonal portion of A */
4487       ncols_o = bi[i+1] - bi[i];
4488       for (jo=0; jo<ncols_o; jo++) {
4489         col = cmap[*bj];
4490         if (col >= cstart) break;
4491         *cam++ = *ba++; bj++;
4492       }
4493       /* diagonal portion of A */
4494       ncols_d = ai[i+1] - ai[i];
4495       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4496       /* off-diagonal portion of A */
4497       for (j=jo; j<ncols_o; j++) {
4498         *cam++ = *ba++; bj++;
4499       }
4500     }
4501   } else {
4502     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4503   }
4504 
4505   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4506   PetscFunctionReturn(0);
4507 }
4508 
4509 #undef __FUNCT__
4510 #define __FUNCT__ "MatGetLocalMatCondensed"
4511 /*@C
4512      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4513 
4514     Not Collective
4515 
4516    Input Parameters:
4517 +    A - the matrix
4518 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4519 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4520 
4521    Output Parameter:
4522 .    A_loc - the local sequential matrix generated
4523 
4524     Level: developer
4525 
4526 @*/
4527 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4528 {
4529   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4530   PetscErrorCode    ierr;
4531   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4532   IS                isrowa,iscola;
4533   Mat               *aloc;
4534 
4535   PetscFunctionBegin;
4536   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4537   if (!row){
4538     start = A->rmap->rstart; end = A->rmap->rend;
4539     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4540   } else {
4541     isrowa = *row;
4542   }
4543   if (!col){
4544     start = A->cmap->rstart;
4545     cmap  = a->garray;
4546     nzA   = a->A->cmap->n;
4547     nzB   = a->B->cmap->n;
4548     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4549     ncols = 0;
4550     for (i=0; i<nzB; i++) {
4551       if (cmap[i] < start) idx[ncols++] = cmap[i];
4552       else break;
4553     }
4554     imark = i;
4555     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4556     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4557     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4558     ierr = PetscFree(idx);CHKERRQ(ierr);
4559   } else {
4560     iscola = *col;
4561   }
4562   if (scall != MAT_INITIAL_MATRIX){
4563     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4564     aloc[0] = *A_loc;
4565   }
4566   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4567   *A_loc = aloc[0];
4568   ierr = PetscFree(aloc);CHKERRQ(ierr);
4569   if (!row){
4570     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4571   }
4572   if (!col){
4573     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4574   }
4575   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4576   PetscFunctionReturn(0);
4577 }
4578 
4579 #undef __FUNCT__
4580 #define __FUNCT__ "MatGetBrowsOfAcols"
4581 /*@C
4582     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4583 
4584     Collective on Mat
4585 
4586    Input Parameters:
4587 +    A,B - the matrices in mpiaij format
4588 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4589 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4590 
4591    Output Parameter:
4592 +    rowb, colb - index sets of rows and columns of B to extract
4593 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4594 -    B_seq - the sequential matrix generated
4595 
4596     Level: developer
4597 
4598 @*/
4599 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4600 {
4601   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4602   PetscErrorCode    ierr;
4603   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4604   IS                isrowb,iscolb;
4605   Mat               *bseq;
4606 
4607   PetscFunctionBegin;
4608   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4609     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4610   }
4611   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4612 
4613   if (scall == MAT_INITIAL_MATRIX){
4614     start = A->cmap->rstart;
4615     cmap  = a->garray;
4616     nzA   = a->A->cmap->n;
4617     nzB   = a->B->cmap->n;
4618     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4619     ncols = 0;
4620     for (i=0; i<nzB; i++) {  /* row < local row index */
4621       if (cmap[i] < start) idx[ncols++] = cmap[i];
4622       else break;
4623     }
4624     imark = i;
4625     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4626     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4627     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4628     ierr = PetscFree(idx);CHKERRQ(ierr);
4629     *brstart = imark;
4630     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4631   } else {
4632     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4633     isrowb = *rowb; iscolb = *colb;
4634     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4635     bseq[0] = *B_seq;
4636   }
4637   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4638   *B_seq = bseq[0];
4639   ierr = PetscFree(bseq);CHKERRQ(ierr);
4640   if (!rowb){
4641     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4642   } else {
4643     *rowb = isrowb;
4644   }
4645   if (!colb){
4646     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4647   } else {
4648     *colb = iscolb;
4649   }
4650   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4651   PetscFunctionReturn(0);
4652 }
4653 
4654 #undef __FUNCT__
4655 #define __FUNCT__ "MatGetBrowsOfAoCols"
4656 /*@C
4657     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4658     of the OFF-DIAGONAL portion of local A
4659 
4660     Collective on Mat
4661 
4662    Input Parameters:
4663 +    A,B - the matrices in mpiaij format
4664 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4665 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4666 .    startsj_r - similar to startsj for receives
4667 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4668 
4669    Output Parameter:
4670 +    B_oth - the sequential matrix generated
4671 
4672     Level: developer
4673 
4674 @*/
4675 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4676 {
4677   VecScatter_MPI_General *gen_to,*gen_from;
4678   PetscErrorCode         ierr;
4679   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4680   Mat_SeqAIJ             *b_oth;
4681   VecScatter             ctx=a->Mvctx;
4682   MPI_Comm               comm=((PetscObject)ctx)->comm;
4683   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4684   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4685   PetscScalar            *rvalues,*svalues;
4686   MatScalar              *b_otha,*bufa,*bufA;
4687   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4688   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4689   MPI_Status             *sstatus,rstatus;
4690   PetscMPIInt            jj;
4691   PetscInt               *cols,sbs,rbs;
4692   PetscScalar            *vals;
4693 
4694   PetscFunctionBegin;
4695   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4696     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4697   }
4698   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4699   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4700 
4701   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4702   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4703   rvalues  = gen_from->values; /* holds the length of receiving row */
4704   svalues  = gen_to->values;   /* holds the length of sending row */
4705   nrecvs   = gen_from->n;
4706   nsends   = gen_to->n;
4707 
4708   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4709   srow     = gen_to->indices;   /* local row index to be sent */
4710   sstarts  = gen_to->starts;
4711   sprocs   = gen_to->procs;
4712   sstatus  = gen_to->sstatus;
4713   sbs      = gen_to->bs;
4714   rstarts  = gen_from->starts;
4715   rprocs   = gen_from->procs;
4716   rbs      = gen_from->bs;
4717 
4718   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4719   if (scall == MAT_INITIAL_MATRIX){
4720     /* i-array */
4721     /*---------*/
4722     /*  post receives */
4723     for (i=0; i<nrecvs; i++){
4724       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4725       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4726       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4727     }
4728 
4729     /* pack the outgoing message */
4730     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4731     sstartsj[0] = 0;  rstartsj[0] = 0;
4732     len = 0; /* total length of j or a array to be sent */
4733     k = 0;
4734     for (i=0; i<nsends; i++){
4735       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4736       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4737       for (j=0; j<nrows; j++) {
4738         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4739         for (l=0; l<sbs; l++){
4740           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4741           rowlen[j*sbs+l] = ncols;
4742           len += ncols;
4743           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4744         }
4745         k++;
4746       }
4747       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4748       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4749     }
4750     /* recvs and sends of i-array are completed */
4751     i = nrecvs;
4752     while (i--) {
4753       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4754     }
4755     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4756 
4757     /* allocate buffers for sending j and a arrays */
4758     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4759     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4760 
4761     /* create i-array of B_oth */
4762     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4763     b_othi[0] = 0;
4764     len = 0; /* total length of j or a array to be received */
4765     k = 0;
4766     for (i=0; i<nrecvs; i++){
4767       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4768       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4769       for (j=0; j<nrows; j++) {
4770         b_othi[k+1] = b_othi[k] + rowlen[j];
4771         len += rowlen[j]; k++;
4772       }
4773       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4774     }
4775 
4776     /* allocate space for j and a arrrays of B_oth */
4777     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4778     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4779 
4780     /* j-array */
4781     /*---------*/
4782     /*  post receives of j-array */
4783     for (i=0; i<nrecvs; i++){
4784       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4785       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4786     }
4787 
4788     /* pack the outgoing message j-array */
4789     k = 0;
4790     for (i=0; i<nsends; i++){
4791       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4792       bufJ = bufj+sstartsj[i];
4793       for (j=0; j<nrows; j++) {
4794         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4795         for (ll=0; ll<sbs; ll++){
4796           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4797           for (l=0; l<ncols; l++){
4798             *bufJ++ = cols[l];
4799           }
4800           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4801         }
4802       }
4803       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4804     }
4805 
4806     /* recvs and sends of j-array are completed */
4807     i = nrecvs;
4808     while (i--) {
4809       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4810     }
4811     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4812   } else if (scall == MAT_REUSE_MATRIX){
4813     sstartsj = *startsj;
4814     rstartsj = *startsj_r;
4815     bufa     = *bufa_ptr;
4816     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4817     b_otha   = b_oth->a;
4818   } else {
4819     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4820   }
4821 
4822   /* a-array */
4823   /*---------*/
4824   /*  post receives of a-array */
4825   for (i=0; i<nrecvs; i++){
4826     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4827     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4828   }
4829 
4830   /* pack the outgoing message a-array */
4831   k = 0;
4832   for (i=0; i<nsends; i++){
4833     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4834     bufA = bufa+sstartsj[i];
4835     for (j=0; j<nrows; j++) {
4836       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4837       for (ll=0; ll<sbs; ll++){
4838         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4839         for (l=0; l<ncols; l++){
4840           *bufA++ = vals[l];
4841         }
4842         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4843       }
4844     }
4845     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4846   }
4847   /* recvs and sends of a-array are completed */
4848   i = nrecvs;
4849   while (i--) {
4850     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4851   }
4852   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4853   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4854 
4855   if (scall == MAT_INITIAL_MATRIX){
4856     /* put together the new matrix */
4857     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4858 
4859     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4860     /* Since these are PETSc arrays, change flags to free them as necessary. */
4861     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4862     b_oth->free_a  = PETSC_TRUE;
4863     b_oth->free_ij = PETSC_TRUE;
4864     b_oth->nonew   = 0;
4865 
4866     ierr = PetscFree(bufj);CHKERRQ(ierr);
4867     if (!startsj || !bufa_ptr){
4868       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4869       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4870     } else {
4871       *startsj   = sstartsj;
4872       *startsj_r = rstartsj;
4873       *bufa_ptr  = bufa;
4874     }
4875   }
4876   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4877   PetscFunctionReturn(0);
4878 }
4879 
4880 #undef __FUNCT__
4881 #define __FUNCT__ "MatGetCommunicationStructs"
4882 /*@C
4883   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4884 
4885   Not Collective
4886 
4887   Input Parameters:
4888 . A - The matrix in mpiaij format
4889 
4890   Output Parameter:
4891 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4892 . colmap - A map from global column index to local index into lvec
4893 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4894 
4895   Level: developer
4896 
4897 @*/
4898 #if defined (PETSC_USE_CTABLE)
4899 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4900 #else
4901 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4902 #endif
4903 {
4904   Mat_MPIAIJ *a;
4905 
4906   PetscFunctionBegin;
4907   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4908   PetscValidPointer(lvec, 2)
4909   PetscValidPointer(colmap, 3)
4910   PetscValidPointer(multScatter, 4)
4911   a = (Mat_MPIAIJ *) A->data;
4912   if (lvec) *lvec = a->lvec;
4913   if (colmap) *colmap = a->colmap;
4914   if (multScatter) *multScatter = a->Mvctx;
4915   PetscFunctionReturn(0);
4916 }
4917 
4918 EXTERN_C_BEGIN
4919 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4920 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4921 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4922 EXTERN_C_END
4923 
4924 #undef __FUNCT__
4925 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4926 /*
4927     Computes (B'*A')' since computing B*A directly is untenable
4928 
4929                n                       p                          p
4930         (              )       (              )         (                  )
4931       m (      A       )  *  n (       B      )   =   m (         C        )
4932         (              )       (              )         (                  )
4933 
4934 */
4935 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4936 {
4937   PetscErrorCode     ierr;
4938   Mat                At,Bt,Ct;
4939 
4940   PetscFunctionBegin;
4941   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4942   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4943   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4944   ierr = MatDestroy(At);CHKERRQ(ierr);
4945   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4946   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4947   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4948   PetscFunctionReturn(0);
4949 }
4950 
4951 #undef __FUNCT__
4952 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4953 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4954 {
4955   PetscErrorCode ierr;
4956   PetscInt       m=A->rmap->n,n=B->cmap->n;
4957   Mat            Cmat;
4958 
4959   PetscFunctionBegin;
4960   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4961   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4962   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4963   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4964   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4965   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4966   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4967   *C   = Cmat;
4968   PetscFunctionReturn(0);
4969 }
4970 
4971 /* ----------------------------------------------------------------*/
4972 #undef __FUNCT__
4973 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4974 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4975 {
4976   PetscErrorCode ierr;
4977 
4978   PetscFunctionBegin;
4979   if (scall == MAT_INITIAL_MATRIX){
4980     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4981   }
4982   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4983   PetscFunctionReturn(0);
4984 }
4985 
4986 EXTERN_C_BEGIN
4987 #if defined(PETSC_HAVE_MUMPS)
4988 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4989 #endif
4990 #if defined(PETSC_HAVE_PASTIX)
4991 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
4992 #endif
4993 #if defined(PETSC_HAVE_SUPERLU_DIST)
4994 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
4995 #endif
4996 #if defined(PETSC_HAVE_SPOOLES)
4997 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
4998 #endif
4999 EXTERN_C_END
5000 
5001 /*MC
5002    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5003 
5004    Options Database Keys:
5005 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5006 
5007   Level: beginner
5008 
5009 .seealso: MatCreateMPIAIJ()
5010 M*/
5011 
5012 EXTERN_C_BEGIN
5013 #undef __FUNCT__
5014 #define __FUNCT__ "MatCreate_MPIAIJ"
5015 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5016 {
5017   Mat_MPIAIJ     *b;
5018   PetscErrorCode ierr;
5019   PetscMPIInt    size;
5020 
5021   PetscFunctionBegin;
5022   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5023 
5024   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5025   B->data         = (void*)b;
5026   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5027   B->rmap->bs     = 1;
5028   B->assembled    = PETSC_FALSE;
5029   B->mapping      = 0;
5030 
5031   B->insertmode   = NOT_SET_VALUES;
5032   b->size         = size;
5033   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5034 
5035   /* build cache for off array entries formed */
5036   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5037   b->donotstash  = PETSC_FALSE;
5038   b->colmap      = 0;
5039   b->garray      = 0;
5040   b->roworiented = PETSC_TRUE;
5041 
5042   /* stuff used for matrix vector multiply */
5043   b->lvec      = PETSC_NULL;
5044   b->Mvctx     = PETSC_NULL;
5045 
5046   /* stuff for MatGetRow() */
5047   b->rowindices   = 0;
5048   b->rowvalues    = 0;
5049   b->getrowactive = PETSC_FALSE;
5050 
5051 #if defined(PETSC_HAVE_SPOOLES)
5052   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5053                                      "MatGetFactor_mpiaij_spooles",
5054                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5055 #endif
5056 #if defined(PETSC_HAVE_MUMPS)
5057   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5058                                      "MatGetFactor_mpiaij_mumps",
5059                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5060 #endif
5061 #if defined(PETSC_HAVE_PASTIX)
5062   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5063 					   "MatGetFactor_mpiaij_pastix",
5064 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5065 #endif
5066 #if defined(PETSC_HAVE_SUPERLU_DIST)
5067   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5068                                      "MatGetFactor_mpiaij_superlu_dist",
5069                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5070 #endif
5071   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5072                                      "MatStoreValues_MPIAIJ",
5073                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5074   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5075                                      "MatRetrieveValues_MPIAIJ",
5076                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5077   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5078 				     "MatGetDiagonalBlock_MPIAIJ",
5079                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5080   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5081 				     "MatIsTranspose_MPIAIJ",
5082 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5083   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5084 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5085 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5086   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5087 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5088 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5089   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5090 				     "MatDiagonalScaleLocal_MPIAIJ",
5091 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5092   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5093                                      "MatConvert_MPIAIJ_MPICSRPERM",
5094                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5095   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5096                                      "MatConvert_MPIAIJ_MPICRL",
5097                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5098   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5099                                      "MatConvert_MPIAIJ_MPISBAIJ",
5100                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5101   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5102                                      "MatMatMult_MPIDense_MPIAIJ",
5103                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5104   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5105                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5106                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5107   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5108                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5109                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5110   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5111   PetscFunctionReturn(0);
5112 }
5113 EXTERN_C_END
5114 
5115 #undef __FUNCT__
5116 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5117 /*@
5118      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5119          and "off-diagonal" part of the matrix in CSR format.
5120 
5121    Collective on MPI_Comm
5122 
5123    Input Parameters:
5124 +  comm - MPI communicator
5125 .  m - number of local rows (Cannot be PETSC_DECIDE)
5126 .  n - This value should be the same as the local size used in creating the
5127        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5128        calculated if N is given) For square matrices n is almost always m.
5129 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5130 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5131 .   i - row indices for "diagonal" portion of matrix
5132 .   j - column indices
5133 .   a - matrix values
5134 .   oi - row indices for "off-diagonal" portion of matrix
5135 .   oj - column indices
5136 -   oa - matrix values
5137 
5138    Output Parameter:
5139 .   mat - the matrix
5140 
5141    Level: advanced
5142 
5143    Notes:
5144        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5145 
5146        The i and j indices are 0 based
5147 
5148        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5149 
5150        This sets local rows and cannot be used to set off-processor values.
5151 
5152        You cannot later use MatSetValues() to change values in this matrix.
5153 
5154 .keywords: matrix, aij, compressed row, sparse, parallel
5155 
5156 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5157           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5158 @*/
5159 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5160 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5161 {
5162   PetscErrorCode ierr;
5163   Mat_MPIAIJ     *maij;
5164 
5165  PetscFunctionBegin;
5166   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5167   if (i[0]) {
5168     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5169   }
5170   if (oi[0]) {
5171     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5172   }
5173   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5174   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5175   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5176   maij = (Mat_MPIAIJ*) (*mat)->data;
5177   maij->donotstash     = PETSC_TRUE;
5178   (*mat)->preallocated = PETSC_TRUE;
5179 
5180   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5181   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5182   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5183   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5184 
5185   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5186   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5187 
5188   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5189   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5190   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5191   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5192 
5193   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5194   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5195   PetscFunctionReturn(0);
5196 }
5197 
5198 /*
5199     Special version for direct calls from Fortran
5200 */
5201 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5202 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5203 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5204 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5205 #endif
5206 
5207 /* Change these macros so can be used in void function */
5208 #undef CHKERRQ
5209 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5210 #undef SETERRQ2
5211 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5212 #undef SETERRQ
5213 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5214 
5215 EXTERN_C_BEGIN
5216 #undef __FUNCT__
5217 #define __FUNCT__ "matsetvaluesmpiaij_"
5218 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5219 {
5220   Mat             mat = *mmat;
5221   PetscInt        m = *mm, n = *mn;
5222   InsertMode      addv = *maddv;
5223   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5224   PetscScalar     value;
5225   PetscErrorCode  ierr;
5226 
5227   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5228   if (mat->insertmode == NOT_SET_VALUES) {
5229     mat->insertmode = addv;
5230   }
5231 #if defined(PETSC_USE_DEBUG)
5232   else if (mat->insertmode != addv) {
5233     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5234   }
5235 #endif
5236   {
5237   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5238   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5239   PetscTruth      roworiented = aij->roworiented;
5240 
5241   /* Some Variables required in the macro */
5242   Mat             A = aij->A;
5243   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5244   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5245   MatScalar       *aa = a->a;
5246   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5247   Mat             B = aij->B;
5248   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5249   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5250   MatScalar       *ba = b->a;
5251 
5252   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5253   PetscInt        nonew = a->nonew;
5254   MatScalar       *ap1,*ap2;
5255 
5256   PetscFunctionBegin;
5257   for (i=0; i<m; i++) {
5258     if (im[i] < 0) continue;
5259 #if defined(PETSC_USE_DEBUG)
5260     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5261 #endif
5262     if (im[i] >= rstart && im[i] < rend) {
5263       row      = im[i] - rstart;
5264       lastcol1 = -1;
5265       rp1      = aj + ai[row];
5266       ap1      = aa + ai[row];
5267       rmax1    = aimax[row];
5268       nrow1    = ailen[row];
5269       low1     = 0;
5270       high1    = nrow1;
5271       lastcol2 = -1;
5272       rp2      = bj + bi[row];
5273       ap2      = ba + bi[row];
5274       rmax2    = bimax[row];
5275       nrow2    = bilen[row];
5276       low2     = 0;
5277       high2    = nrow2;
5278 
5279       for (j=0; j<n; j++) {
5280         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5281         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5282         if (in[j] >= cstart && in[j] < cend){
5283           col = in[j] - cstart;
5284           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5285         } else if (in[j] < 0) continue;
5286 #if defined(PETSC_USE_DEBUG)
5287         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5288 #endif
5289         else {
5290           if (mat->was_assembled) {
5291             if (!aij->colmap) {
5292               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5293             }
5294 #if defined (PETSC_USE_CTABLE)
5295             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5296 	    col--;
5297 #else
5298             col = aij->colmap[in[j]] - 1;
5299 #endif
5300             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5301               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5302               col =  in[j];
5303               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5304               B = aij->B;
5305               b = (Mat_SeqAIJ*)B->data;
5306               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5307               rp2      = bj + bi[row];
5308               ap2      = ba + bi[row];
5309               rmax2    = bimax[row];
5310               nrow2    = bilen[row];
5311               low2     = 0;
5312               high2    = nrow2;
5313               bm       = aij->B->rmap->n;
5314               ba = b->a;
5315             }
5316           } else col = in[j];
5317           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5318         }
5319       }
5320     } else {
5321       if (!aij->donotstash) {
5322         if (roworiented) {
5323           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5324         } else {
5325           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5326         }
5327       }
5328     }
5329   }}
5330   PetscFunctionReturnVoid();
5331 }
5332 EXTERN_C_END
5333 
5334