xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6abefa4f446b2ea91915da4e25ea0defa83db48e)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 
207 #define CHUNKSIZE   15
208 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
209 { \
210     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
211     lastcol1 = col;\
212     while (high1-low1 > 5) { \
213       t = (low1+high1)/2; \
214       if (rp1[t] > col) high1 = t; \
215       else             low1  = t; \
216     } \
217       for (_i=low1; _i<high1; _i++) { \
218         if (rp1[_i] > col) break; \
219         if (rp1[_i] == col) { \
220           if (addv == ADD_VALUES) ap1[_i] += value;   \
221           else                    ap1[_i] = value; \
222           goto a_noinsert; \
223         } \
224       }  \
225       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
226       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
227       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
228       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
229       N = nrow1++ - 1; a->nz++; high1++; \
230       /* shift up all the later entries in this row */ \
231       for (ii=N; ii>=_i; ii--) { \
232         rp1[ii+1] = rp1[ii]; \
233         ap1[ii+1] = ap1[ii]; \
234       } \
235       rp1[_i] = col;  \
236       ap1[_i] = value;  \
237       a_noinsert: ; \
238       ailen[row] = nrow1; \
239 }
240 
241 
242 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
243 { \
244     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
245     lastcol2 = col;\
246     while (high2-low2 > 5) { \
247       t = (low2+high2)/2; \
248       if (rp2[t] > col) high2 = t; \
249       else             low2  = t; \
250     } \
251     for (_i=low2; _i<high2; _i++) {		\
252       if (rp2[_i] > col) break;			\
253       if (rp2[_i] == col) {			      \
254 	if (addv == ADD_VALUES) ap2[_i] += value;     \
255 	else                    ap2[_i] = value;      \
256 	goto b_noinsert;			      \
257       }						      \
258     }							      \
259     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
260     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
261     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
262     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
263     N = nrow2++ - 1; b->nz++; high2++;					\
264     /* shift up all the later entries in this row */			\
265     for (ii=N; ii>=_i; ii--) {						\
266       rp2[ii+1] = rp2[ii];						\
267       ap2[ii+1] = ap2[ii];						\
268     }									\
269     rp2[_i] = col;							\
270     ap2[_i] = value;							\
271     b_noinsert: ;								\
272     bilen[row] = nrow2;							\
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
277 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
278 {
279   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
280   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
281   PetscErrorCode ierr;
282   PetscInt       l,*garray = mat->garray,diag;
283 
284   PetscFunctionBegin;
285   /* code only works for square matrices A */
286 
287   /* find size of row to the left of the diagonal part */
288   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
289   row  = row - diag;
290   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
291     if (garray[b->j[b->i[row]+l]] > diag) break;
292   }
293   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
294 
295   /* diagonal part */
296   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
297 
298   /* right of diagonal part */
299   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
300   PetscFunctionReturn(0);
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIAIJ"
305 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
308   PetscScalar    value;
309   PetscErrorCode ierr;
310   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
311   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
312   PetscTruth     roworiented = aij->roworiented;
313 
314   /* Some Variables required in the macro */
315   Mat            A = aij->A;
316   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
317   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
318   MatScalar      *aa = a->a;
319   PetscTruth     ignorezeroentries = a->ignorezeroentries;
320   Mat            B = aij->B;
321   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
322   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
323   MatScalar      *ba = b->a;
324 
325   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
326   PetscInt       nonew = a->nonew;
327   MatScalar      *ap1,*ap2;
328 
329   PetscFunctionBegin;
330   if (v) PetscValidScalarPointer(v,6);
331   for (i=0; i<m; i++) {
332     if (im[i] < 0) continue;
333 #if defined(PETSC_USE_DEBUG)
334     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
335 #endif
336     if (im[i] >= rstart && im[i] < rend) {
337       row      = im[i] - rstart;
338       lastcol1 = -1;
339       rp1      = aj + ai[row];
340       ap1      = aa + ai[row];
341       rmax1    = aimax[row];
342       nrow1    = ailen[row];
343       low1     = 0;
344       high1    = nrow1;
345       lastcol2 = -1;
346       rp2      = bj + bi[row];
347       ap2      = ba + bi[row];
348       rmax2    = bimax[row];
349       nrow2    = bilen[row];
350       low2     = 0;
351       high2    = nrow2;
352 
353       for (j=0; j<n; j++) {
354         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
355         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
356         if (in[j] >= cstart && in[j] < cend){
357           col = in[j] - cstart;
358           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
359         } else if (in[j] < 0) continue;
360 #if defined(PETSC_USE_DEBUG)
361         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
362 #endif
363         else {
364           if (mat->was_assembled) {
365             if (!aij->colmap) {
366               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
367             }
368 #if defined (PETSC_USE_CTABLE)
369             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
370 	    col--;
371 #else
372             col = aij->colmap[in[j]] - 1;
373 #endif
374             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
375               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
376               col =  in[j];
377               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
378               B = aij->B;
379               b = (Mat_SeqAIJ*)B->data;
380               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
381               rp2      = bj + bi[row];
382               ap2      = ba + bi[row];
383               rmax2    = bimax[row];
384               nrow2    = bilen[row];
385               low2     = 0;
386               high2    = nrow2;
387               bm       = aij->B->rmap->n;
388               ba = b->a;
389             }
390           } else col = in[j];
391           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
392         }
393       }
394     } else {
395       if (!aij->donotstash) {
396         if (roworiented) {
397           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         } else {
399           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
400         }
401       }
402     }
403   }
404   PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatGetValues_MPIAIJ"
409 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
410 {
411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
414   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
415 
416   PetscFunctionBegin;
417   for (i=0; i<m; i++) {
418     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
419     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
420     if (idxm[i] >= rstart && idxm[i] < rend) {
421       row = idxm[i] - rstart;
422       for (j=0; j<n; j++) {
423         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
424         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
425         if (idxn[j] >= cstart && idxn[j] < cend){
426           col = idxn[j] - cstart;
427           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
428         } else {
429           if (!aij->colmap) {
430             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
431           }
432 #if defined (PETSC_USE_CTABLE)
433           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
434           col --;
435 #else
436           col = aij->colmap[idxn[j]] - 1;
437 #endif
438           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
439           else {
440             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
441           }
442         }
443       }
444     } else {
445       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
446     }
447   }
448   PetscFunctionReturn(0);
449 }
450 
451 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
452 
453 #undef __FUNCT__
454 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
455 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
456 {
457   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
458   PetscErrorCode ierr;
459   PetscInt       nstash,reallocs;
460   InsertMode     addv;
461 
462   PetscFunctionBegin;
463   if (aij->donotstash) {
464     PetscFunctionReturn(0);
465   }
466 
467   /* make sure all processors are either in INSERTMODE or ADDMODE */
468   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
469   if (addv == (ADD_VALUES|INSERT_VALUES)) {
470     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
471   }
472   mat->insertmode = addv; /* in case this processor had no cache */
473 
474   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
475   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
476   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
477   PetscFunctionReturn(0);
478 }
479 
480 #undef __FUNCT__
481 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
482 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
483 {
484   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
485   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
486   PetscErrorCode ierr;
487   PetscMPIInt    n;
488   PetscInt       i,j,rstart,ncols,flg;
489   PetscInt       *row,*col;
490   PetscTruth     other_disassembled;
491   PetscScalar    *val;
492   InsertMode     addv = mat->insertmode;
493 
494   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
495   PetscFunctionBegin;
496   if (!aij->donotstash) {
497     while (1) {
498       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
499       if (!flg) break;
500 
501       for (i=0; i<n;) {
502         /* Now identify the consecutive vals belonging to the same row */
503         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
504         if (j < n) ncols = j-i;
505         else       ncols = n-i;
506         /* Now assemble all these values with a single function call */
507         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
508         i = j;
509       }
510     }
511     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
512   }
513   a->compressedrow.use     = PETSC_FALSE;
514   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
515   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
516 
517   /* determine if any processor has disassembled, if so we must
518      also disassemble ourselfs, in order that we may reassemble. */
519   /*
520      if nonzero structure of submatrix B cannot change then we know that
521      no processor disassembled thus we can skip this stuff
522   */
523   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
524     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
525     if (mat->was_assembled && !other_disassembled) {
526       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
527     }
528   }
529   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
530     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
531   }
532   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
533   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
534   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
535   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
536 
537   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
538   aij->rowvalues = 0;
539 
540   /* used by MatAXPY() */
541   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
542   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
543 
544   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
545   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
546   PetscFunctionReturn(0);
547 }
548 
549 #undef __FUNCT__
550 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
551 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
552 {
553   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
554   PetscErrorCode ierr;
555 
556   PetscFunctionBegin;
557   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
558   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
559   PetscFunctionReturn(0);
560 }
561 
562 #undef __FUNCT__
563 #define __FUNCT__ "MatZeroRows_MPIAIJ"
564 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
565 {
566   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
567   PetscErrorCode ierr;
568   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
569   PetscInt       i,*owners = A->rmap->range;
570   PetscInt       *nprocs,j,idx,nsends,row;
571   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
572   PetscInt       *rvalues,count,base,slen,*source;
573   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
574   MPI_Comm       comm = ((PetscObject)A)->comm;
575   MPI_Request    *send_waits,*recv_waits;
576   MPI_Status     recv_status,*send_status;
577 #if defined(PETSC_DEBUG)
578   PetscTruth     found = PETSC_FALSE;
579 #endif
580 
581   PetscFunctionBegin;
582   /*  first count number of contributors to each processor */
583   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
584   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
585   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
586   j = 0;
587   for (i=0; i<N; i++) {
588     if (lastidx > (idx = rows[i])) j = 0;
589     lastidx = idx;
590     for (; j<size; j++) {
591       if (idx >= owners[j] && idx < owners[j+1]) {
592         nprocs[2*j]++;
593         nprocs[2*j+1] = 1;
594         owner[i] = j;
595 #if defined(PETSC_DEBUG)
596         found = PETSC_TRUE;
597 #endif
598         break;
599       }
600     }
601 #if defined(PETSC_DEBUG)
602     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
603     found = PETSC_FALSE;
604 #endif
605   }
606   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
607 
608   /* inform other processors of number of messages and max length*/
609   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
610 
611   /* post receives:   */
612   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
613   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
614   for (i=0; i<nrecvs; i++) {
615     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
616   }
617 
618   /* do sends:
619       1) starts[i] gives the starting index in svalues for stuff going to
620          the ith processor
621   */
622   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
623   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
624   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
625   starts[0] = 0;
626   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
627   for (i=0; i<N; i++) {
628     svalues[starts[owner[i]]++] = rows[i];
629   }
630 
631   starts[0] = 0;
632   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
633   count = 0;
634   for (i=0; i<size; i++) {
635     if (nprocs[2*i+1]) {
636       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
637     }
638   }
639   ierr = PetscFree(starts);CHKERRQ(ierr);
640 
641   base = owners[rank];
642 
643   /*  wait on receives */
644   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
645   count  = nrecvs; slen = 0;
646   while (count) {
647     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
648     /* unpack receives into our local space */
649     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
650     source[imdex]  = recv_status.MPI_SOURCE;
651     lens[imdex]    = n;
652     slen          += n;
653     count--;
654   }
655   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
656 
657   /* move the data into the send scatter */
658   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
659   count = 0;
660   for (i=0; i<nrecvs; i++) {
661     values = rvalues + i*nmax;
662     for (j=0; j<lens[i]; j++) {
663       lrows[count++] = values[j] - base;
664     }
665   }
666   ierr = PetscFree(rvalues);CHKERRQ(ierr);
667   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
668   ierr = PetscFree(owner);CHKERRQ(ierr);
669   ierr = PetscFree(nprocs);CHKERRQ(ierr);
670 
671   /* actually zap the local rows */
672   /*
673         Zero the required rows. If the "diagonal block" of the matrix
674      is square and the user wishes to set the diagonal we use separate
675      code so that MatSetValues() is not called for each diagonal allocating
676      new memory, thus calling lots of mallocs and slowing things down.
677 
678   */
679   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
680   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
681   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
682     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
683   } else if (diag != 0.0) {
684     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
685     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
686       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
687 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
688     }
689     for (i = 0; i < slen; i++) {
690       row  = lrows[i] + rstart;
691       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
692     }
693     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
695   } else {
696     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
697   }
698   ierr = PetscFree(lrows);CHKERRQ(ierr);
699 
700   /* wait on sends */
701   if (nsends) {
702     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
703     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
704     ierr = PetscFree(send_status);CHKERRQ(ierr);
705   }
706   ierr = PetscFree(send_waits);CHKERRQ(ierr);
707   ierr = PetscFree(svalues);CHKERRQ(ierr);
708 
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatMult_MPIAIJ"
714 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
715 {
716   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode ierr;
718   PetscInt       nt;
719 
720   PetscFunctionBegin;
721   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
722   if (nt != A->cmap->n) {
723     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
724   }
725   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
726   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
727   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
728   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
734 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
735 {
736   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
737   PetscErrorCode ierr;
738 
739   PetscFunctionBegin;
740   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatMultAdd_MPIAIJ"
746 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
747 {
748   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
753   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
754   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
755   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
756   PetscFunctionReturn(0);
757 }
758 
759 #undef __FUNCT__
760 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
761 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
762 {
763   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
764   PetscErrorCode ierr;
765   PetscTruth     merged;
766 
767   PetscFunctionBegin;
768   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
769   /* do nondiagonal part */
770   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
771   if (!merged) {
772     /* send it on its way */
773     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
774     /* do local part */
775     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
776     /* receive remote parts: note this assumes the values are not actually */
777     /* added in yy until the next line, */
778     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
779   } else {
780     /* do local part */
781     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
782     /* send it on its way */
783     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
784     /* values actually were received in the Begin() but we need to call this nop */
785     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
786   }
787   PetscFunctionReturn(0);
788 }
789 
790 EXTERN_C_BEGIN
791 #undef __FUNCT__
792 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
793 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
794 {
795   MPI_Comm       comm;
796   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
797   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
798   IS             Me,Notme;
799   PetscErrorCode ierr;
800   PetscInt       M,N,first,last,*notme,i;
801   PetscMPIInt    size;
802 
803   PetscFunctionBegin;
804 
805   /* Easy test: symmetric diagonal block */
806   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
807   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
808   if (!*f) PetscFunctionReturn(0);
809   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
810   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
811   if (size == 1) PetscFunctionReturn(0);
812 
813   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
814   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
815   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
816   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
817   for (i=0; i<first; i++) notme[i] = i;
818   for (i=last; i<M; i++) notme[i-last+first] = i;
819   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
820   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
821   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
822   Aoff = Aoffs[0];
823   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
824   Boff = Boffs[0];
825   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
826   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
827   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
828   ierr = ISDestroy(Me);CHKERRQ(ierr);
829   ierr = ISDestroy(Notme);CHKERRQ(ierr);
830 
831   PetscFunctionReturn(0);
832 }
833 EXTERN_C_END
834 
835 #undef __FUNCT__
836 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
837 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
838 {
839   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
840   PetscErrorCode ierr;
841 
842   PetscFunctionBegin;
843   /* do nondiagonal part */
844   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
845   /* send it on its way */
846   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
847   /* do local part */
848   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
849   /* receive remote parts */
850   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
851   PetscFunctionReturn(0);
852 }
853 
854 /*
855   This only works correctly for square matrices where the subblock A->A is the
856    diagonal block
857 */
858 #undef __FUNCT__
859 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
860 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
861 {
862   PetscErrorCode ierr;
863   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
864 
865   PetscFunctionBegin;
866   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
867   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
868     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
869   }
870   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatScale_MPIAIJ"
876 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
877 {
878   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
883   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
884   PetscFunctionReturn(0);
885 }
886 
887 #undef __FUNCT__
888 #define __FUNCT__ "MatDestroy_MPIAIJ"
889 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
890 {
891   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
892   PetscErrorCode ierr;
893 
894   PetscFunctionBegin;
895 #if defined(PETSC_USE_LOG)
896   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
897 #endif
898   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
899   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
900   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
901   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
902 #if defined (PETSC_USE_CTABLE)
903   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
904 #else
905   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
906 #endif
907   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
908   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
909   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
910   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
911   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
912   ierr = PetscFree(aij);CHKERRQ(ierr);
913 
914   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
922   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
923   PetscFunctionReturn(0);
924 }
925 
926 #undef __FUNCT__
927 #define __FUNCT__ "MatView_MPIAIJ_Binary"
928 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
929 {
930   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
931   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
932   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
933   PetscErrorCode    ierr;
934   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
935   int               fd;
936   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
937   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
938   PetscScalar       *column_values;
939 
940   PetscFunctionBegin;
941   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
942   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
943   nz   = A->nz + B->nz;
944   if (!rank) {
945     header[0] = MAT_FILE_COOKIE;
946     header[1] = mat->rmap->N;
947     header[2] = mat->cmap->N;
948     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
949     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
950     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
951     /* get largest number of rows any processor has */
952     rlen = mat->rmap->n;
953     range = mat->rmap->range;
954     for (i=1; i<size; i++) {
955       rlen = PetscMax(rlen,range[i+1] - range[i]);
956     }
957   } else {
958     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
959     rlen = mat->rmap->n;
960   }
961 
962   /* load up the local row counts */
963   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
964   for (i=0; i<mat->rmap->n; i++) {
965     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
966   }
967 
968   /* store the row lengths to the file */
969   if (!rank) {
970     MPI_Status status;
971     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
972     for (i=1; i<size; i++) {
973       rlen = range[i+1] - range[i];
974       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
975       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
976     }
977   } else {
978     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
979   }
980   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
981 
982   /* load up the local column indices */
983   nzmax = nz; /* )th processor needs space a largest processor needs */
984   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
985   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
986   cnt  = 0;
987   for (i=0; i<mat->rmap->n; i++) {
988     for (j=B->i[i]; j<B->i[i+1]; j++) {
989       if ( (col = garray[B->j[j]]) > cstart) break;
990       column_indices[cnt++] = col;
991     }
992     for (k=A->i[i]; k<A->i[i+1]; k++) {
993       column_indices[cnt++] = A->j[k] + cstart;
994     }
995     for (; j<B->i[i+1]; j++) {
996       column_indices[cnt++] = garray[B->j[j]];
997     }
998   }
999   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1000 
1001   /* store the column indices to the file */
1002   if (!rank) {
1003     MPI_Status status;
1004     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1005     for (i=1; i<size; i++) {
1006       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1007       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1008       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1009       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1010     }
1011   } else {
1012     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1013     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1014   }
1015   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1016 
1017   /* load up the local column values */
1018   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1019   cnt  = 0;
1020   for (i=0; i<mat->rmap->n; i++) {
1021     for (j=B->i[i]; j<B->i[i+1]; j++) {
1022       if ( garray[B->j[j]] > cstart) break;
1023       column_values[cnt++] = B->a[j];
1024     }
1025     for (k=A->i[i]; k<A->i[i+1]; k++) {
1026       column_values[cnt++] = A->a[k];
1027     }
1028     for (; j<B->i[i+1]; j++) {
1029       column_values[cnt++] = B->a[j];
1030     }
1031   }
1032   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1033 
1034   /* store the column values to the file */
1035   if (!rank) {
1036     MPI_Status status;
1037     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1038     for (i=1; i<size; i++) {
1039       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1040       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1041       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1042       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1043     }
1044   } else {
1045     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1047   }
1048   ierr = PetscFree(column_values);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1054 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1055 {
1056   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1057   PetscErrorCode    ierr;
1058   PetscMPIInt       rank = aij->rank,size = aij->size;
1059   PetscTruth        isdraw,iascii,isbinary;
1060   PetscViewer       sviewer;
1061   PetscViewerFormat format;
1062 
1063   PetscFunctionBegin;
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1065   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1066   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1067   if (iascii) {
1068     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1069     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1070       MatInfo    info;
1071       PetscTruth inodes;
1072 
1073       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1074       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1075       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1076       if (!inodes) {
1077         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1078 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1079       } else {
1080         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1081 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1082       }
1083       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1084       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1085       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1086       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1087       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1088       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1089       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1090       PetscFunctionReturn(0);
1091     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1092       PetscInt   inodecount,inodelimit,*inodes;
1093       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1094       if (inodes) {
1095         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1096       } else {
1097         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1098       }
1099       PetscFunctionReturn(0);
1100     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1101       PetscFunctionReturn(0);
1102     }
1103   } else if (isbinary) {
1104     if (size == 1) {
1105       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1106       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1107     } else {
1108       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1109     }
1110     PetscFunctionReturn(0);
1111   } else if (isdraw) {
1112     PetscDraw  draw;
1113     PetscTruth isnull;
1114     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1115     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1116   }
1117 
1118   if (size == 1) {
1119     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1120     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1121   } else {
1122     /* assemble the entire matrix onto first processor. */
1123     Mat         A;
1124     Mat_SeqAIJ  *Aloc;
1125     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1126     MatScalar   *a;
1127 
1128     if (mat->rmap->N > 1024) {
1129       PetscTruth flg = PETSC_FALSE;
1130 
1131       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1132       if (!flg) {
1133         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1134       }
1135     }
1136 
1137     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1138     if (!rank) {
1139       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1140     } else {
1141       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1142     }
1143     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1144     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1145     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1146     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1147 
1148     /* copy over the A part */
1149     Aloc = (Mat_SeqAIJ*)aij->A->data;
1150     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1151     row = mat->rmap->rstart;
1152     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1153     for (i=0; i<m; i++) {
1154       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1155       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1156     }
1157     aj = Aloc->j;
1158     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1159 
1160     /* copy over the B part */
1161     Aloc = (Mat_SeqAIJ*)aij->B->data;
1162     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1163     row  = mat->rmap->rstart;
1164     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1165     ct   = cols;
1166     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1167     for (i=0; i<m; i++) {
1168       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1169       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1170     }
1171     ierr = PetscFree(ct);CHKERRQ(ierr);
1172     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1174     /*
1175        Everyone has to call to draw the matrix since the graphics waits are
1176        synchronized across all processors that share the PetscDraw object
1177     */
1178     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1179     if (!rank) {
1180       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1181       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1182     }
1183     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1184     ierr = MatDestroy(A);CHKERRQ(ierr);
1185   }
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "MatView_MPIAIJ"
1191 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1192 {
1193   PetscErrorCode ierr;
1194   PetscTruth     iascii,isdraw,issocket,isbinary;
1195 
1196   PetscFunctionBegin;
1197   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1198   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1200   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1201   if (iascii || isdraw || isbinary || issocket) {
1202     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1203   } else {
1204     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1205   }
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 #undef __FUNCT__
1210 #define __FUNCT__ "MatSOR_MPIAIJ"
1211 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1212 {
1213   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1214   PetscErrorCode ierr;
1215   Vec            bb1 = 0;
1216   PetscTruth     hasop;
1217 
1218   PetscFunctionBegin;
1219   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1220     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1221   }
1222 
1223   if (flag == SOR_APPLY_UPPER) {
1224     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1225     PetscFunctionReturn(0);
1226   }
1227 
1228   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1229     if (flag & SOR_ZERO_INITIAL_GUESS) {
1230       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1231       its--;
1232     }
1233 
1234     while (its--) {
1235       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1236       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1237 
1238       /* update rhs: bb1 = bb - B*x */
1239       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1240       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1241 
1242       /* local sweep */
1243       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1244     }
1245   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1246     if (flag & SOR_ZERO_INITIAL_GUESS) {
1247       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1248       its--;
1249     }
1250     while (its--) {
1251       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1252       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1253 
1254       /* update rhs: bb1 = bb - B*x */
1255       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1256       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1257 
1258       /* local sweep */
1259       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1260     }
1261   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1262     if (flag & SOR_ZERO_INITIAL_GUESS) {
1263       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1264       its--;
1265     }
1266     while (its--) {
1267       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269 
1270       /* update rhs: bb1 = bb - B*x */
1271       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1272       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1273 
1274       /* local sweep */
1275       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1276     }
1277   }  else if (flag & SOR_EISENSTAT) {
1278     Vec         xx1;
1279 
1280     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1281     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1282 
1283     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1285     if (!mat->diag) {
1286       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1287       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1288     }
1289     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1290     if (hasop) {
1291       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1292     } else {
1293       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1294     }
1295     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1296 
1297     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1298 
1299     /* local sweep */
1300     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1301     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1302     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1303   } else {
1304     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1305   }
1306 
1307   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatPermute_MPIAIJ"
1313 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1314 {
1315   MPI_Comm       comm,pcomm;
1316   PetscInt       first,local_size,nrows;
1317   const PetscInt *rows;
1318   PetscMPIInt    size;
1319   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1320   PetscErrorCode ierr;
1321 
1322   PetscFunctionBegin;
1323   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1324   /* make a collective version of 'rowp' */
1325   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1326   if (pcomm==comm) {
1327     crowp = rowp;
1328   } else {
1329     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1330     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1331     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1332     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1333   }
1334   /* collect the global row permutation and invert it */
1335   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1336   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1337   if (pcomm!=comm) {
1338     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1339   }
1340   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1341   /* get the local target indices */
1342   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1343   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1344   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1345   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1346   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1347   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1348   /* the column permutation is so much easier;
1349      make a local version of 'colp' and invert it */
1350   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1351   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1352   if (size==1) {
1353     lcolp = colp;
1354   } else {
1355     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1356     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1357     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1358   }
1359   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1360   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1361   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1362   if (size>1) {
1363     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1364     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1365   }
1366   /* now we just get the submatrix */
1367   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1368   /* clean up */
1369   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1370   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1376 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1377 {
1378   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1379   Mat            A = mat->A,B = mat->B;
1380   PetscErrorCode ierr;
1381   PetscReal      isend[5],irecv[5];
1382 
1383   PetscFunctionBegin;
1384   info->block_size     = 1.0;
1385   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1386   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1387   isend[3] = info->memory;  isend[4] = info->mallocs;
1388   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1389   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1390   isend[3] += info->memory;  isend[4] += info->mallocs;
1391   if (flag == MAT_LOCAL) {
1392     info->nz_used      = isend[0];
1393     info->nz_allocated = isend[1];
1394     info->nz_unneeded  = isend[2];
1395     info->memory       = isend[3];
1396     info->mallocs      = isend[4];
1397   } else if (flag == MAT_GLOBAL_MAX) {
1398     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1399     info->nz_used      = irecv[0];
1400     info->nz_allocated = irecv[1];
1401     info->nz_unneeded  = irecv[2];
1402     info->memory       = irecv[3];
1403     info->mallocs      = irecv[4];
1404   } else if (flag == MAT_GLOBAL_SUM) {
1405     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1406     info->nz_used      = irecv[0];
1407     info->nz_allocated = irecv[1];
1408     info->nz_unneeded  = irecv[2];
1409     info->memory       = irecv[3];
1410     info->mallocs      = irecv[4];
1411   }
1412   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1413   info->fill_ratio_needed = 0;
1414   info->factor_mallocs    = 0;
1415 
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatSetOption_MPIAIJ"
1421 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1422 {
1423   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1424   PetscErrorCode ierr;
1425 
1426   PetscFunctionBegin;
1427   switch (op) {
1428   case MAT_NEW_NONZERO_LOCATIONS:
1429   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1430   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1431   case MAT_KEEP_NONZERO_PATTERN:
1432   case MAT_NEW_NONZERO_LOCATION_ERR:
1433   case MAT_USE_INODES:
1434   case MAT_IGNORE_ZERO_ENTRIES:
1435     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1436     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1437     break;
1438   case MAT_ROW_ORIENTED:
1439     a->roworiented = flg;
1440     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1441     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1442     break;
1443   case MAT_NEW_DIAGONALS:
1444     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1445     break;
1446   case MAT_IGNORE_OFF_PROC_ENTRIES:
1447     a->donotstash = PETSC_TRUE;
1448     break;
1449   case MAT_SYMMETRIC:
1450     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1451     break;
1452   case MAT_STRUCTURALLY_SYMMETRIC:
1453     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1454     break;
1455   case MAT_HERMITIAN:
1456     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1457     break;
1458   case MAT_SYMMETRY_ETERNAL:
1459     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1460     break;
1461   default:
1462     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1463   }
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatGetRow_MPIAIJ"
1469 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1470 {
1471   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1472   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1473   PetscErrorCode ierr;
1474   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1475   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1476   PetscInt       *cmap,*idx_p;
1477 
1478   PetscFunctionBegin;
1479   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1480   mat->getrowactive = PETSC_TRUE;
1481 
1482   if (!mat->rowvalues && (idx || v)) {
1483     /*
1484         allocate enough space to hold information from the longest row.
1485     */
1486     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1487     PetscInt   max = 1,tmp;
1488     for (i=0; i<matin->rmap->n; i++) {
1489       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1490       if (max < tmp) { max = tmp; }
1491     }
1492     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1493   }
1494 
1495   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1496   lrow = row - rstart;
1497 
1498   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1499   if (!v)   {pvA = 0; pvB = 0;}
1500   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1501   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1502   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1503   nztot = nzA + nzB;
1504 
1505   cmap  = mat->garray;
1506   if (v  || idx) {
1507     if (nztot) {
1508       /* Sort by increasing column numbers, assuming A and B already sorted */
1509       PetscInt imark = -1;
1510       if (v) {
1511         *v = v_p = mat->rowvalues;
1512         for (i=0; i<nzB; i++) {
1513           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1514           else break;
1515         }
1516         imark = i;
1517         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1518         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1519       }
1520       if (idx) {
1521         *idx = idx_p = mat->rowindices;
1522         if (imark > -1) {
1523           for (i=0; i<imark; i++) {
1524             idx_p[i] = cmap[cworkB[i]];
1525           }
1526         } else {
1527           for (i=0; i<nzB; i++) {
1528             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1529             else break;
1530           }
1531           imark = i;
1532         }
1533         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1534         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1535       }
1536     } else {
1537       if (idx) *idx = 0;
1538       if (v)   *v   = 0;
1539     }
1540   }
1541   *nz = nztot;
1542   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1543   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #undef __FUNCT__
1548 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1549 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1550 {
1551   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1552 
1553   PetscFunctionBegin;
1554   if (!aij->getrowactive) {
1555     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1556   }
1557   aij->getrowactive = PETSC_FALSE;
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatNorm_MPIAIJ"
1563 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1564 {
1565   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1566   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1567   PetscErrorCode ierr;
1568   PetscInt       i,j,cstart = mat->cmap->rstart;
1569   PetscReal      sum = 0.0;
1570   MatScalar      *v;
1571 
1572   PetscFunctionBegin;
1573   if (aij->size == 1) {
1574     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1575   } else {
1576     if (type == NORM_FROBENIUS) {
1577       v = amat->a;
1578       for (i=0; i<amat->nz; i++) {
1579 #if defined(PETSC_USE_COMPLEX)
1580         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1581 #else
1582         sum += (*v)*(*v); v++;
1583 #endif
1584       }
1585       v = bmat->a;
1586       for (i=0; i<bmat->nz; i++) {
1587 #if defined(PETSC_USE_COMPLEX)
1588         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1589 #else
1590         sum += (*v)*(*v); v++;
1591 #endif
1592       }
1593       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1594       *norm = sqrt(*norm);
1595     } else if (type == NORM_1) { /* max column norm */
1596       PetscReal *tmp,*tmp2;
1597       PetscInt  *jj,*garray = aij->garray;
1598       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1599       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1600       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1601       *norm = 0.0;
1602       v = amat->a; jj = amat->j;
1603       for (j=0; j<amat->nz; j++) {
1604         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1605       }
1606       v = bmat->a; jj = bmat->j;
1607       for (j=0; j<bmat->nz; j++) {
1608         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1609       }
1610       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1611       for (j=0; j<mat->cmap->N; j++) {
1612         if (tmp2[j] > *norm) *norm = tmp2[j];
1613       }
1614       ierr = PetscFree(tmp);CHKERRQ(ierr);
1615       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1616     } else if (type == NORM_INFINITY) { /* max row norm */
1617       PetscReal ntemp = 0.0;
1618       for (j=0; j<aij->A->rmap->n; j++) {
1619         v = amat->a + amat->i[j];
1620         sum = 0.0;
1621         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1622           sum += PetscAbsScalar(*v); v++;
1623         }
1624         v = bmat->a + bmat->i[j];
1625         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1626           sum += PetscAbsScalar(*v); v++;
1627         }
1628         if (sum > ntemp) ntemp = sum;
1629       }
1630       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1631     } else {
1632       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1633     }
1634   }
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatTranspose_MPIAIJ"
1640 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1641 {
1642   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1643   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1644   PetscErrorCode ierr;
1645   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1646   PetscInt       cstart=A->cmap->rstart,ncol;
1647   Mat            B;
1648   MatScalar      *array;
1649 
1650   PetscFunctionBegin;
1651   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1652 
1653   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1654   ai = Aloc->i; aj = Aloc->j;
1655   bi = Bloc->i; bj = Bloc->j;
1656   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1657     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1658     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1659     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1660     for (i=0; i<ai[ma]; i++){
1661       d_nnz[aj[i]] ++;
1662       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1663     }
1664 
1665     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1666     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1667     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1668     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1669     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1670   } else {
1671     B = *matout;
1672   }
1673 
1674   /* copy over the A part */
1675   array = Aloc->a;
1676   row = A->rmap->rstart;
1677   for (i=0; i<ma; i++) {
1678     ncol = ai[i+1]-ai[i];
1679     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1680     row++; array += ncol; aj += ncol;
1681   }
1682   aj = Aloc->j;
1683   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1684 
1685   /* copy over the B part */
1686   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1687   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1688   array = Bloc->a;
1689   row = A->rmap->rstart;
1690   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1691   cols_tmp = cols;
1692   for (i=0; i<mb; i++) {
1693     ncol = bi[i+1]-bi[i];
1694     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1695     row++; array += ncol; cols_tmp += ncol;
1696   }
1697   ierr = PetscFree(cols);CHKERRQ(ierr);
1698 
1699   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1701   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1702     *matout = B;
1703   } else {
1704     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1705   }
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1711 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1712 {
1713   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1714   Mat            a = aij->A,b = aij->B;
1715   PetscErrorCode ierr;
1716   PetscInt       s1,s2,s3;
1717 
1718   PetscFunctionBegin;
1719   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1720   if (rr) {
1721     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1722     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1723     /* Overlap communication with computation. */
1724     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1725   }
1726   if (ll) {
1727     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1728     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1729     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1730   }
1731   /* scale  the diagonal block */
1732   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1733 
1734   if (rr) {
1735     /* Do a scatter end and then right scale the off-diagonal block */
1736     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1737     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1738   }
1739 
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1745 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1746 {
1747   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1748   PetscErrorCode ierr;
1749 
1750   PetscFunctionBegin;
1751   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1752   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1753   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1754   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1755   PetscFunctionReturn(0);
1756 }
1757 #undef __FUNCT__
1758 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1759 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1760 {
1761   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1762   PetscErrorCode ierr;
1763 
1764   PetscFunctionBegin;
1765   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 #undef __FUNCT__
1770 #define __FUNCT__ "MatEqual_MPIAIJ"
1771 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1772 {
1773   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1774   Mat            a,b,c,d;
1775   PetscTruth     flg;
1776   PetscErrorCode ierr;
1777 
1778   PetscFunctionBegin;
1779   a = matA->A; b = matA->B;
1780   c = matB->A; d = matB->B;
1781 
1782   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1783   if (flg) {
1784     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1785   }
1786   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 #undef __FUNCT__
1791 #define __FUNCT__ "MatCopy_MPIAIJ"
1792 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1793 {
1794   PetscErrorCode ierr;
1795   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1796   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1797 
1798   PetscFunctionBegin;
1799   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1800   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1801     /* because of the column compression in the off-processor part of the matrix a->B,
1802        the number of columns in a->B and b->B may be different, hence we cannot call
1803        the MatCopy() directly on the two parts. If need be, we can provide a more
1804        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1805        then copying the submatrices */
1806     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1807   } else {
1808     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1809     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1810   }
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 #undef __FUNCT__
1815 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1816 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1817 {
1818   PetscErrorCode ierr;
1819 
1820   PetscFunctionBegin;
1821   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatAXPY_MPIAIJ"
1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1828 {
1829   PetscErrorCode ierr;
1830   PetscInt       i;
1831   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1832   PetscBLASInt   bnz,one=1;
1833   Mat_SeqAIJ     *x,*y;
1834 
1835   PetscFunctionBegin;
1836   if (str == SAME_NONZERO_PATTERN) {
1837     PetscScalar alpha = a;
1838     x = (Mat_SeqAIJ *)xx->A->data;
1839     y = (Mat_SeqAIJ *)yy->A->data;
1840     bnz = PetscBLASIntCast(x->nz);
1841     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1842     x = (Mat_SeqAIJ *)xx->B->data;
1843     y = (Mat_SeqAIJ *)yy->B->data;
1844     bnz = PetscBLASIntCast(x->nz);
1845     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1846   } else if (str == SUBSET_NONZERO_PATTERN) {
1847     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1848 
1849     x = (Mat_SeqAIJ *)xx->B->data;
1850     y = (Mat_SeqAIJ *)yy->B->data;
1851     if (y->xtoy && y->XtoY != xx->B) {
1852       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1853       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1854     }
1855     if (!y->xtoy) { /* get xtoy */
1856       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1857       y->XtoY = xx->B;
1858       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1859     }
1860     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1861   } else {
1862     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1863   }
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatConjugate_MPIAIJ"
1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1872 {
1873 #if defined(PETSC_USE_COMPLEX)
1874   PetscErrorCode ierr;
1875   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1876 
1877   PetscFunctionBegin;
1878   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1879   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1880 #else
1881   PetscFunctionBegin;
1882 #endif
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatRealPart_MPIAIJ"
1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1889 {
1890   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1891   PetscErrorCode ierr;
1892 
1893   PetscFunctionBegin;
1894   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1895   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1902 {
1903   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1904   PetscErrorCode ierr;
1905 
1906   PetscFunctionBegin;
1907   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1908   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #ifdef PETSC_HAVE_PBGL
1913 
1914 #include <boost/parallel/mpi/bsp_process_group.hpp>
1915 #include <boost/graph/distributed/ilu_default_graph.hpp>
1916 #include <boost/graph/distributed/ilu_0_block.hpp>
1917 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1918 #include <boost/graph/distributed/petsc/interface.hpp>
1919 #include <boost/multi_array.hpp>
1920 #include <boost/parallel/distributed_property_map->hpp>
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1924 /*
1925   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1926 */
1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1928 {
1929   namespace petsc = boost::distributed::petsc;
1930 
1931   namespace graph_dist = boost::graph::distributed;
1932   using boost::graph::distributed::ilu_default::process_group_type;
1933   using boost::graph::ilu_permuted;
1934 
1935   PetscTruth      row_identity, col_identity;
1936   PetscContainer  c;
1937   PetscInt        m, n, M, N;
1938   PetscErrorCode  ierr;
1939 
1940   PetscFunctionBegin;
1941   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1942   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1943   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1944   if (!row_identity || !col_identity) {
1945     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1946   }
1947 
1948   process_group_type pg;
1949   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1950   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1951   lgraph_type&   level_graph = *lgraph_p;
1952   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1953 
1954   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1955   ilu_permuted(level_graph);
1956 
1957   /* put together the new matrix */
1958   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1959   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1960   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1961   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1962   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1963   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1964   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1965 
1966   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1967   ierr = PetscContainerSetPointer(c, lgraph_p);
1968   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1975 {
1976   PetscFunctionBegin;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatSolve_MPIAIJ"
1982 /*
1983   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1984 */
1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1986 {
1987   namespace graph_dist = boost::graph::distributed;
1988 
1989   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1990   lgraph_type*   lgraph_p;
1991   PetscContainer c;
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1996   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1997   ierr = VecCopy(b, x);CHKERRQ(ierr);
1998 
1999   PetscScalar* array_x;
2000   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2001   PetscInt sx;
2002   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2003 
2004   PetscScalar* array_b;
2005   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2006   PetscInt sb;
2007   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2008 
2009   lgraph_type&   level_graph = *lgraph_p;
2010   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2011 
2012   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2013   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2014                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2015 
2016   typedef boost::iterator_property_map<array_ref_type::iterator,
2017                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2018   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2019                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2020 
2021   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2022 
2023   PetscFunctionReturn(0);
2024 }
2025 #endif
2026 
2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2028   PetscInt       nzlocal,nsends,nrecvs;
2029   PetscMPIInt    *send_rank,*recv_rank;
2030   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2031   PetscScalar    *sbuf_a,**rbuf_a;
2032   PetscErrorCode (*MatDestroy)(Mat);
2033 } Mat_Redundant;
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2038 {
2039   PetscErrorCode       ierr;
2040   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2041   PetscInt             i;
2042 
2043   PetscFunctionBegin;
2044   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2045   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2046   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2047   for (i=0; i<redund->nrecvs; i++){
2048     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2049     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2050   }
2051   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2052   ierr = PetscFree(redund);CHKERRQ(ierr);
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 #undef __FUNCT__
2057 #define __FUNCT__ "MatDestroy_MatRedundant"
2058 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2059 {
2060   PetscErrorCode  ierr;
2061   PetscContainer  container;
2062   Mat_Redundant   *redund=PETSC_NULL;
2063 
2064   PetscFunctionBegin;
2065   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2066   if (container) {
2067     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2068   } else {
2069     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2070   }
2071   A->ops->destroy = redund->MatDestroy;
2072   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2073   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2074   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2081 {
2082   PetscMPIInt    rank,size;
2083   MPI_Comm       comm=((PetscObject)mat)->comm;
2084   PetscErrorCode ierr;
2085   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2086   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2087   PetscInt       *rowrange=mat->rmap->range;
2088   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2089   Mat            A=aij->A,B=aij->B,C=*matredundant;
2090   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2091   PetscScalar    *sbuf_a;
2092   PetscInt       nzlocal=a->nz+b->nz;
2093   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2094   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2095   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2096   MatScalar      *aworkA,*aworkB;
2097   PetscScalar    *vals;
2098   PetscMPIInt    tag1,tag2,tag3,imdex;
2099   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2100                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2101   MPI_Status     recv_status,*send_status;
2102   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2103   PetscInt       **rbuf_j=PETSC_NULL;
2104   PetscScalar    **rbuf_a=PETSC_NULL;
2105   Mat_Redundant  *redund=PETSC_NULL;
2106   PetscContainer container;
2107 
2108   PetscFunctionBegin;
2109   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2111 
2112   if (reuse == MAT_REUSE_MATRIX) {
2113     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2114     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2115     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2116     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2117     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2118     if (container) {
2119       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2120     } else {
2121       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2122     }
2123     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2124 
2125     nsends    = redund->nsends;
2126     nrecvs    = redund->nrecvs;
2127     send_rank = redund->send_rank;
2128     recv_rank = redund->recv_rank;
2129     sbuf_nz   = redund->sbuf_nz;
2130     rbuf_nz   = redund->rbuf_nz;
2131     sbuf_j    = redund->sbuf_j;
2132     sbuf_a    = redund->sbuf_a;
2133     rbuf_j    = redund->rbuf_j;
2134     rbuf_a    = redund->rbuf_a;
2135   }
2136 
2137   if (reuse == MAT_INITIAL_MATRIX){
2138     PetscMPIInt  subrank,subsize;
2139     PetscInt     nleftover,np_subcomm;
2140     /* get the destination processors' id send_rank, nsends and nrecvs */
2141     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2142     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2143     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2144     np_subcomm = size/nsubcomm;
2145     nleftover  = size - nsubcomm*np_subcomm;
2146     nsends = 0; nrecvs = 0;
2147     for (i=0; i<size; i++){ /* i=rank*/
2148       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2149         send_rank[nsends] = i; nsends++;
2150         recv_rank[nrecvs++] = i;
2151       }
2152     }
2153     if (rank >= size - nleftover){/* this proc is a leftover processor */
2154       i = size-nleftover-1;
2155       j = 0;
2156       while (j < nsubcomm - nleftover){
2157         send_rank[nsends++] = i;
2158         i--; j++;
2159       }
2160     }
2161 
2162     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2163       for (i=0; i<nleftover; i++){
2164         recv_rank[nrecvs++] = size-nleftover+i;
2165       }
2166     }
2167 
2168     /* allocate sbuf_j, sbuf_a */
2169     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2170     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2171     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2172   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2173 
2174   /* copy mat's local entries into the buffers */
2175   if (reuse == MAT_INITIAL_MATRIX){
2176     rownz_max = 0;
2177     rptr = sbuf_j;
2178     cols = sbuf_j + rend-rstart + 1;
2179     vals = sbuf_a;
2180     rptr[0] = 0;
2181     for (i=0; i<rend-rstart; i++){
2182       row = i + rstart;
2183       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2184       ncols  = nzA + nzB;
2185       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2186       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2187       /* load the column indices for this row into cols */
2188       lwrite = 0;
2189       for (l=0; l<nzB; l++) {
2190         if ((ctmp = bmap[cworkB[l]]) < cstart){
2191           vals[lwrite]   = aworkB[l];
2192           cols[lwrite++] = ctmp;
2193         }
2194       }
2195       for (l=0; l<nzA; l++){
2196         vals[lwrite]   = aworkA[l];
2197         cols[lwrite++] = cstart + cworkA[l];
2198       }
2199       for (l=0; l<nzB; l++) {
2200         if ((ctmp = bmap[cworkB[l]]) >= cend){
2201           vals[lwrite]   = aworkB[l];
2202           cols[lwrite++] = ctmp;
2203         }
2204       }
2205       vals += ncols;
2206       cols += ncols;
2207       rptr[i+1] = rptr[i] + ncols;
2208       if (rownz_max < ncols) rownz_max = ncols;
2209     }
2210     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2211   } else { /* only copy matrix values into sbuf_a */
2212     rptr = sbuf_j;
2213     vals = sbuf_a;
2214     rptr[0] = 0;
2215     for (i=0; i<rend-rstart; i++){
2216       row = i + rstart;
2217       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2218       ncols  = nzA + nzB;
2219       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2220       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2221       lwrite = 0;
2222       for (l=0; l<nzB; l++) {
2223         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2224       }
2225       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2226       for (l=0; l<nzB; l++) {
2227         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2228       }
2229       vals += ncols;
2230       rptr[i+1] = rptr[i] + ncols;
2231     }
2232   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2233 
2234   /* send nzlocal to others, and recv other's nzlocal */
2235   /*--------------------------------------------------*/
2236   if (reuse == MAT_INITIAL_MATRIX){
2237     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2238     s_waits2 = s_waits3 + nsends;
2239     s_waits1 = s_waits2 + nsends;
2240     r_waits1 = s_waits1 + nsends;
2241     r_waits2 = r_waits1 + nrecvs;
2242     r_waits3 = r_waits2 + nrecvs;
2243   } else {
2244     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2245     r_waits3 = s_waits3 + nsends;
2246   }
2247 
2248   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2249   if (reuse == MAT_INITIAL_MATRIX){
2250     /* get new tags to keep the communication clean */
2251     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2252     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2253     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2254 
2255     /* post receives of other's nzlocal */
2256     for (i=0; i<nrecvs; i++){
2257       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2258     }
2259     /* send nzlocal to others */
2260     for (i=0; i<nsends; i++){
2261       sbuf_nz[i] = nzlocal;
2262       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2263     }
2264     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2265     count = nrecvs;
2266     while (count) {
2267       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2268       recv_rank[imdex] = recv_status.MPI_SOURCE;
2269       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2270       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2271 
2272       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2273       rbuf_nz[imdex] += i + 2;
2274       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2275       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2276       count--;
2277     }
2278     /* wait on sends of nzlocal */
2279     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2280     /* send mat->i,j to others, and recv from other's */
2281     /*------------------------------------------------*/
2282     for (i=0; i<nsends; i++){
2283       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2284       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2285     }
2286     /* wait on receives of mat->i,j */
2287     /*------------------------------*/
2288     count = nrecvs;
2289     while (count) {
2290       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2291       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2292       count--;
2293     }
2294     /* wait on sends of mat->i,j */
2295     /*---------------------------*/
2296     if (nsends) {
2297       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2298     }
2299   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2300 
2301   /* post receives, send and receive mat->a */
2302   /*----------------------------------------*/
2303   for (imdex=0; imdex<nrecvs; imdex++) {
2304     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2305   }
2306   for (i=0; i<nsends; i++){
2307     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2308   }
2309   count = nrecvs;
2310   while (count) {
2311     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2312     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2313     count--;
2314   }
2315   if (nsends) {
2316     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2317   }
2318 
2319   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2320 
2321   /* create redundant matrix */
2322   /*-------------------------*/
2323   if (reuse == MAT_INITIAL_MATRIX){
2324     /* compute rownz_max for preallocation */
2325     for (imdex=0; imdex<nrecvs; imdex++){
2326       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2327       rptr = rbuf_j[imdex];
2328       for (i=0; i<j; i++){
2329         ncols = rptr[i+1] - rptr[i];
2330         if (rownz_max < ncols) rownz_max = ncols;
2331       }
2332     }
2333 
2334     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2335     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2336     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2337     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2338     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2339   } else {
2340     C = *matredundant;
2341   }
2342 
2343   /* insert local matrix entries */
2344   rptr = sbuf_j;
2345   cols = sbuf_j + rend-rstart + 1;
2346   vals = sbuf_a;
2347   for (i=0; i<rend-rstart; i++){
2348     row   = i + rstart;
2349     ncols = rptr[i+1] - rptr[i];
2350     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2351     vals += ncols;
2352     cols += ncols;
2353   }
2354   /* insert received matrix entries */
2355   for (imdex=0; imdex<nrecvs; imdex++){
2356     rstart = rowrange[recv_rank[imdex]];
2357     rend   = rowrange[recv_rank[imdex]+1];
2358     rptr = rbuf_j[imdex];
2359     cols = rbuf_j[imdex] + rend-rstart + 1;
2360     vals = rbuf_a[imdex];
2361     for (i=0; i<rend-rstart; i++){
2362       row   = i + rstart;
2363       ncols = rptr[i+1] - rptr[i];
2364       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2365       vals += ncols;
2366       cols += ncols;
2367     }
2368   }
2369   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2370   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2371   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2372   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2373   if (reuse == MAT_INITIAL_MATRIX){
2374     PetscContainer container;
2375     *matredundant = C;
2376     /* create a supporting struct and attach it to C for reuse */
2377     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2378     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2379     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2380     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2381     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2382 
2383     redund->nzlocal = nzlocal;
2384     redund->nsends  = nsends;
2385     redund->nrecvs  = nrecvs;
2386     redund->send_rank = send_rank;
2387     redund->recv_rank = recv_rank;
2388     redund->sbuf_nz = sbuf_nz;
2389     redund->rbuf_nz = rbuf_nz;
2390     redund->sbuf_j  = sbuf_j;
2391     redund->sbuf_a  = sbuf_a;
2392     redund->rbuf_j  = rbuf_j;
2393     redund->rbuf_a  = rbuf_a;
2394 
2395     redund->MatDestroy = C->ops->destroy;
2396     C->ops->destroy    = MatDestroy_MatRedundant;
2397   }
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2404 {
2405   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2406   PetscErrorCode ierr;
2407   PetscInt       i,*idxb = 0;
2408   PetscScalar    *va,*vb;
2409   Vec            vtmp;
2410 
2411   PetscFunctionBegin;
2412   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2413   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2414   if (idx) {
2415     for (i=0; i<A->rmap->n; i++) {
2416       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2417     }
2418   }
2419 
2420   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2421   if (idx) {
2422     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2423   }
2424   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2425   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2426 
2427   for (i=0; i<A->rmap->n; i++){
2428     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2429       va[i] = vb[i];
2430       if (idx) idx[i] = a->garray[idxb[i]];
2431     }
2432   }
2433 
2434   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2435   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2436   if (idxb) {
2437     ierr = PetscFree(idxb);CHKERRQ(ierr);
2438   }
2439   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2446 {
2447   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2448   PetscErrorCode ierr;
2449   PetscInt       i,*idxb = 0;
2450   PetscScalar    *va,*vb;
2451   Vec            vtmp;
2452 
2453   PetscFunctionBegin;
2454   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2455   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2456   if (idx) {
2457     for (i=0; i<A->cmap->n; i++) {
2458       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2459     }
2460   }
2461 
2462   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2463   if (idx) {
2464     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2465   }
2466   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2467   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2468 
2469   for (i=0; i<A->rmap->n; i++){
2470     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2471       va[i] = vb[i];
2472       if (idx) idx[i] = a->garray[idxb[i]];
2473     }
2474   }
2475 
2476   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2477   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2478   if (idxb) {
2479     ierr = PetscFree(idxb);CHKERRQ(ierr);
2480   }
2481   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2488 {
2489   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2490   PetscInt       n      = A->rmap->n;
2491   PetscInt       cstart = A->cmap->rstart;
2492   PetscInt      *cmap   = mat->garray;
2493   PetscInt      *diagIdx, *offdiagIdx;
2494   Vec            diagV, offdiagV;
2495   PetscScalar   *a, *diagA, *offdiagA;
2496   PetscInt       r;
2497   PetscErrorCode ierr;
2498 
2499   PetscFunctionBegin;
2500   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2501   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2502   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2503   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2504   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2505   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2506   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2507   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2508   for(r = 0; r < n; ++r) {
2509     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2510       a[r]   = diagA[r];
2511       idx[r] = cstart + diagIdx[r];
2512     } else {
2513       a[r]   = offdiagA[r];
2514       idx[r] = cmap[offdiagIdx[r]];
2515     }
2516   }
2517   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2518   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2519   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2520   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2521   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2522   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2529 {
2530   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2531   PetscInt       n      = A->rmap->n;
2532   PetscInt       cstart = A->cmap->rstart;
2533   PetscInt      *cmap   = mat->garray;
2534   PetscInt      *diagIdx, *offdiagIdx;
2535   Vec            diagV, offdiagV;
2536   PetscScalar   *a, *diagA, *offdiagA;
2537   PetscInt       r;
2538   PetscErrorCode ierr;
2539 
2540   PetscFunctionBegin;
2541   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2542   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2543   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2544   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2545   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2546   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2547   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2548   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2549   for(r = 0; r < n; ++r) {
2550     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2551       a[r]   = diagA[r];
2552       idx[r] = cstart + diagIdx[r];
2553     } else {
2554       a[r]   = offdiagA[r];
2555       idx[r] = cmap[offdiagIdx[r]];
2556     }
2557   }
2558   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2559   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2560   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2561   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2562   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2563   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 #undef __FUNCT__
2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2570 {
2571   PetscErrorCode ierr;
2572   Mat            *dummy;
2573 
2574   PetscFunctionBegin;
2575   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2576   *newmat = *dummy;
2577   ierr = PetscFree(dummy);CHKERRQ(ierr);
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2582 /* -------------------------------------------------------------------*/
2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2584        MatGetRow_MPIAIJ,
2585        MatRestoreRow_MPIAIJ,
2586        MatMult_MPIAIJ,
2587 /* 4*/ MatMultAdd_MPIAIJ,
2588        MatMultTranspose_MPIAIJ,
2589        MatMultTransposeAdd_MPIAIJ,
2590 #ifdef PETSC_HAVE_PBGL
2591        MatSolve_MPIAIJ,
2592 #else
2593        0,
2594 #endif
2595        0,
2596        0,
2597 /*10*/ 0,
2598        0,
2599        0,
2600        MatSOR_MPIAIJ,
2601        MatTranspose_MPIAIJ,
2602 /*15*/ MatGetInfo_MPIAIJ,
2603        MatEqual_MPIAIJ,
2604        MatGetDiagonal_MPIAIJ,
2605        MatDiagonalScale_MPIAIJ,
2606        MatNorm_MPIAIJ,
2607 /*20*/ MatAssemblyBegin_MPIAIJ,
2608        MatAssemblyEnd_MPIAIJ,
2609        MatSetOption_MPIAIJ,
2610        MatZeroEntries_MPIAIJ,
2611 /*24*/ MatZeroRows_MPIAIJ,
2612        0,
2613 #ifdef PETSC_HAVE_PBGL
2614        0,
2615 #else
2616        0,
2617 #endif
2618        0,
2619        0,
2620 /*29*/ MatSetUpPreallocation_MPIAIJ,
2621 #ifdef PETSC_HAVE_PBGL
2622        0,
2623 #else
2624        0,
2625 #endif
2626        0,
2627        0,
2628        0,
2629 /*34*/ MatDuplicate_MPIAIJ,
2630        0,
2631        0,
2632        0,
2633        0,
2634 /*39*/ MatAXPY_MPIAIJ,
2635        MatGetSubMatrices_MPIAIJ,
2636        MatIncreaseOverlap_MPIAIJ,
2637        MatGetValues_MPIAIJ,
2638        MatCopy_MPIAIJ,
2639 /*44*/ MatGetRowMax_MPIAIJ,
2640        MatScale_MPIAIJ,
2641        0,
2642        0,
2643        0,
2644 /*49*/ MatSetBlockSize_MPIAIJ,
2645        0,
2646        0,
2647        0,
2648        0,
2649 /*54*/ MatFDColoringCreate_MPIAIJ,
2650        0,
2651        MatSetUnfactored_MPIAIJ,
2652        MatPermute_MPIAIJ,
2653        0,
2654 /*59*/ MatGetSubMatrix_MPIAIJ,
2655        MatDestroy_MPIAIJ,
2656        MatView_MPIAIJ,
2657        0,
2658        0,
2659 /*64*/ 0,
2660        0,
2661        0,
2662        0,
2663        0,
2664 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2665        MatGetRowMinAbs_MPIAIJ,
2666        0,
2667        MatSetColoring_MPIAIJ,
2668 #if defined(PETSC_HAVE_ADIC)
2669        MatSetValuesAdic_MPIAIJ,
2670 #else
2671        0,
2672 #endif
2673        MatSetValuesAdifor_MPIAIJ,
2674 /*75*/ MatFDColoringApply_AIJ,
2675        0,
2676        0,
2677        0,
2678        0,
2679 /*80*/ 0,
2680        0,
2681        0,
2682 /*83*/ MatLoad_MPIAIJ,
2683        0,
2684        0,
2685        0,
2686        0,
2687        0,
2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2689        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2690        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2691        MatPtAP_Basic,
2692        MatPtAPSymbolic_MPIAIJ,
2693 /*94*/ MatPtAPNumeric_MPIAIJ,
2694        0,
2695        0,
2696        0,
2697        0,
2698 /*99*/ 0,
2699        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2700        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2701        MatConjugate_MPIAIJ,
2702        0,
2703 /*104*/MatSetValuesRow_MPIAIJ,
2704        MatRealPart_MPIAIJ,
2705        MatImaginaryPart_MPIAIJ,
2706        0,
2707        0,
2708 /*109*/0,
2709        MatGetRedundantMatrix_MPIAIJ,
2710        MatGetRowMin_MPIAIJ,
2711        0,
2712        0,
2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2714        0,
2715        0,
2716        0,
2717        0,
2718        0
2719 };
2720 
2721 /* ----------------------------------------------------------------------------------------*/
2722 
2723 EXTERN_C_BEGIN
2724 #undef __FUNCT__
2725 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2726 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2727 {
2728   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2729   PetscErrorCode ierr;
2730 
2731   PetscFunctionBegin;
2732   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2733   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2734   PetscFunctionReturn(0);
2735 }
2736 EXTERN_C_END
2737 
2738 EXTERN_C_BEGIN
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2741 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2742 {
2743   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2748   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2749   PetscFunctionReturn(0);
2750 }
2751 EXTERN_C_END
2752 
2753 #include "petscpc.h"
2754 EXTERN_C_BEGIN
2755 #undef __FUNCT__
2756 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2757 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2758 {
2759   Mat_MPIAIJ     *b;
2760   PetscErrorCode ierr;
2761   PetscInt       i;
2762 
2763   PetscFunctionBegin;
2764   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2765   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2766   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2767   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2768 
2769   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2770   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2771   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2772   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2773   if (d_nnz) {
2774     for (i=0; i<B->rmap->n; i++) {
2775       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2776     }
2777   }
2778   if (o_nnz) {
2779     for (i=0; i<B->rmap->n; i++) {
2780       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2781     }
2782   }
2783   b = (Mat_MPIAIJ*)B->data;
2784 
2785   if (!B->preallocated) {
2786     /* Explicitly create 2 MATSEQAIJ matrices. */
2787     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2788     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2789     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2790     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2791     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2792     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2793     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2794     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2795   }
2796 
2797   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2798   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2799   B->preallocated = PETSC_TRUE;
2800   PetscFunctionReturn(0);
2801 }
2802 EXTERN_C_END
2803 
2804 #undef __FUNCT__
2805 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2806 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2807 {
2808   Mat            mat;
2809   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2810   PetscErrorCode ierr;
2811 
2812   PetscFunctionBegin;
2813   *newmat       = 0;
2814   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2815   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2816   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2817   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2818   a    = (Mat_MPIAIJ*)mat->data;
2819 
2820   mat->factor       = matin->factor;
2821   mat->rmap->bs      = matin->rmap->bs;
2822   mat->assembled    = PETSC_TRUE;
2823   mat->insertmode   = NOT_SET_VALUES;
2824   mat->preallocated = PETSC_TRUE;
2825 
2826   a->size           = oldmat->size;
2827   a->rank           = oldmat->rank;
2828   a->donotstash     = oldmat->donotstash;
2829   a->roworiented    = oldmat->roworiented;
2830   a->rowindices     = 0;
2831   a->rowvalues      = 0;
2832   a->getrowactive   = PETSC_FALSE;
2833 
2834   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2835   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2836 
2837   if (oldmat->colmap) {
2838 #if defined (PETSC_USE_CTABLE)
2839     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2840 #else
2841     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2842     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2843     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2844 #endif
2845   } else a->colmap = 0;
2846   if (oldmat->garray) {
2847     PetscInt len;
2848     len  = oldmat->B->cmap->n;
2849     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2850     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2851     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2852   } else a->garray = 0;
2853 
2854   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2855   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2856   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2857   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2858   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2859   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2860   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2861   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2862   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2863   *newmat = mat;
2864   PetscFunctionReturn(0);
2865 }
2866 
2867 #undef __FUNCT__
2868 #define __FUNCT__ "MatLoad_MPIAIJ"
2869 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2870 {
2871   Mat            A;
2872   PetscScalar    *vals,*svals;
2873   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2874   MPI_Status     status;
2875   PetscErrorCode ierr;
2876   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2877   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2878   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2879   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2880   PetscInt       cend,cstart,n,*rowners;
2881   int            fd;
2882 
2883   PetscFunctionBegin;
2884   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2885   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2886   if (!rank) {
2887     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2888     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2889     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2890   }
2891 
2892   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2893   M = header[1]; N = header[2];
2894   /* determine ownership of all rows */
2895   m    = M/size + ((M % size) > rank);
2896   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2897   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2898 
2899   /* First process needs enough room for process with most rows */
2900   if (!rank) {
2901     mmax       = rowners[1];
2902     for (i=2; i<size; i++) {
2903       mmax = PetscMax(mmax,rowners[i]);
2904     }
2905   } else mmax = m;
2906 
2907   rowners[0] = 0;
2908   for (i=2; i<=size; i++) {
2909     rowners[i] += rowners[i-1];
2910   }
2911   rstart = rowners[rank];
2912   rend   = rowners[rank+1];
2913 
2914   /* distribute row lengths to all processors */
2915   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2916   if (!rank) {
2917     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2918     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2919     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2920     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2921     for (j=0; j<m; j++) {
2922       procsnz[0] += ourlens[j];
2923     }
2924     for (i=1; i<size; i++) {
2925       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2926       /* calculate the number of nonzeros on each processor */
2927       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2928         procsnz[i] += rowlengths[j];
2929       }
2930       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2931       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2932     }
2933     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2934   } else {
2935     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2936     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2937   }
2938 
2939   if (!rank) {
2940     /* determine max buffer needed and allocate it */
2941     maxnz = 0;
2942     for (i=0; i<size; i++) {
2943       maxnz = PetscMax(maxnz,procsnz[i]);
2944     }
2945     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2946 
2947     /* read in my part of the matrix column indices  */
2948     nz   = procsnz[0];
2949     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2950     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2951 
2952     /* read in every one elses and ship off */
2953     for (i=1; i<size; i++) {
2954       nz     = procsnz[i];
2955       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2956       mpicnt = PetscMPIIntCast(nz);
2957       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2958     }
2959     ierr = PetscFree(cols);CHKERRQ(ierr);
2960   } else {
2961     /* determine buffer space needed for message */
2962     nz = 0;
2963     for (i=0; i<m; i++) {
2964       nz += ourlens[i];
2965     }
2966     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2967 
2968     /* receive message of column indices*/
2969     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2970     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2971     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2972     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2973     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2974     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2975   }
2976 
2977   /* determine column ownership if matrix is not square */
2978   if (N != M) {
2979     n      = N/size + ((N % size) > rank);
2980     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2981     cstart = cend - n;
2982   } else {
2983     cstart = rstart;
2984     cend   = rend;
2985     n      = cend - cstart;
2986   }
2987 
2988   /* loop over local rows, determining number of off diagonal entries */
2989   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2990   jj = 0;
2991   for (i=0; i<m; i++) {
2992     for (j=0; j<ourlens[i]; j++) {
2993       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2994       jj++;
2995     }
2996   }
2997 
2998   /* create our matrix */
2999   for (i=0; i<m; i++) {
3000     ourlens[i] -= offlens[i];
3001   }
3002   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3003   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
3004   ierr = MatSetType(A,type);CHKERRQ(ierr);
3005   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
3006 
3007   for (i=0; i<m; i++) {
3008     ourlens[i] += offlens[i];
3009   }
3010 
3011   if (!rank) {
3012     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3013 
3014     /* read in my part of the matrix numerical values  */
3015     nz   = procsnz[0];
3016     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3017 
3018     /* insert into matrix */
3019     jj      = rstart;
3020     smycols = mycols;
3021     svals   = vals;
3022     for (i=0; i<m; i++) {
3023       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3024       smycols += ourlens[i];
3025       svals   += ourlens[i];
3026       jj++;
3027     }
3028 
3029     /* read in other processors and ship out */
3030     for (i=1; i<size; i++) {
3031       nz     = procsnz[i];
3032       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3033       mpicnt = PetscMPIIntCast(nz);
3034       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3035     }
3036     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3037   } else {
3038     /* receive numeric values */
3039     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3040 
3041     /* receive message of values*/
3042     mpicnt = PetscMPIIntCast(nz);
3043     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3044     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3045     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3046     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3047     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3048 
3049     /* insert into matrix */
3050     jj      = rstart;
3051     smycols = mycols;
3052     svals   = vals;
3053     for (i=0; i<m; i++) {
3054       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3055       smycols += ourlens[i];
3056       svals   += ourlens[i];
3057       jj++;
3058     }
3059   }
3060   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3061   ierr = PetscFree(vals);CHKERRQ(ierr);
3062   ierr = PetscFree(mycols);CHKERRQ(ierr);
3063   ierr = PetscFree(rowners);CHKERRQ(ierr);
3064 
3065   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3066   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3067   *newmat = A;
3068   PetscFunctionReturn(0);
3069 }
3070 
3071 #undef __FUNCT__
3072 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3073 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3074 {
3075   PetscErrorCode ierr;
3076   IS             iscol_local;
3077   PetscInt       csize;
3078 
3079   PetscFunctionBegin;
3080   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3081   if (call == MAT_REUSE_MATRIX) {
3082     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3083     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3084   } else {
3085     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3086   }
3087   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3088   if (call == MAT_INITIAL_MATRIX) {
3089     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3090     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3091   }
3092   PetscFunctionReturn(0);
3093 }
3094 
3095 #undef __FUNCT__
3096 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3097 /*
3098     Not great since it makes two copies of the submatrix, first an SeqAIJ
3099   in local and then by concatenating the local matrices the end result.
3100   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3101 
3102   Note: This requires a sequential iscol with all indices.
3103 */
3104 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3105 {
3106   PetscErrorCode ierr;
3107   PetscMPIInt    rank,size;
3108   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3109   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3110   Mat            *local,M,Mreuse;
3111   MatScalar      *vwork,*aa;
3112   MPI_Comm       comm = ((PetscObject)mat)->comm;
3113   Mat_SeqAIJ     *aij;
3114 
3115 
3116   PetscFunctionBegin;
3117   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3118   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3119 
3120   if (call ==  MAT_REUSE_MATRIX) {
3121     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3122     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3123     local = &Mreuse;
3124     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3125   } else {
3126     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3127     Mreuse = *local;
3128     ierr   = PetscFree(local);CHKERRQ(ierr);
3129   }
3130 
3131   /*
3132       m - number of local rows
3133       n - number of columns (same on all processors)
3134       rstart - first row in new global matrix generated
3135   */
3136   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3137   if (call == MAT_INITIAL_MATRIX) {
3138     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3139     ii  = aij->i;
3140     jj  = aij->j;
3141 
3142     /*
3143         Determine the number of non-zeros in the diagonal and off-diagonal
3144         portions of the matrix in order to do correct preallocation
3145     */
3146 
3147     /* first get start and end of "diagonal" columns */
3148     if (csize == PETSC_DECIDE) {
3149       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3150       if (mglobal == n) { /* square matrix */
3151 	nlocal = m;
3152       } else {
3153         nlocal = n/size + ((n % size) > rank);
3154       }
3155     } else {
3156       nlocal = csize;
3157     }
3158     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3159     rstart = rend - nlocal;
3160     if (rank == size - 1 && rend != n) {
3161       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3162     }
3163 
3164     /* next, compute all the lengths */
3165     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3166     olens = dlens + m;
3167     for (i=0; i<m; i++) {
3168       jend = ii[i+1] - ii[i];
3169       olen = 0;
3170       dlen = 0;
3171       for (j=0; j<jend; j++) {
3172         if (*jj < rstart || *jj >= rend) olen++;
3173         else dlen++;
3174         jj++;
3175       }
3176       olens[i] = olen;
3177       dlens[i] = dlen;
3178     }
3179     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3180     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3181     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3182     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3183     ierr = PetscFree(dlens);CHKERRQ(ierr);
3184   } else {
3185     PetscInt ml,nl;
3186 
3187     M = *newmat;
3188     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3189     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3190     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3191     /*
3192          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3193        rather than the slower MatSetValues().
3194     */
3195     M->was_assembled = PETSC_TRUE;
3196     M->assembled     = PETSC_FALSE;
3197   }
3198   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3199   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3200   ii  = aij->i;
3201   jj  = aij->j;
3202   aa  = aij->a;
3203   for (i=0; i<m; i++) {
3204     row   = rstart + i;
3205     nz    = ii[i+1] - ii[i];
3206     cwork = jj;     jj += nz;
3207     vwork = aa;     aa += nz;
3208     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3209   }
3210 
3211   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3212   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3213   *newmat = M;
3214 
3215   /* save submatrix used in processor for next request */
3216   if (call ==  MAT_INITIAL_MATRIX) {
3217     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3218     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3219   }
3220 
3221   PetscFunctionReturn(0);
3222 }
3223 
3224 EXTERN_C_BEGIN
3225 #undef __FUNCT__
3226 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3227 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3228 {
3229   PetscInt       m,cstart, cend,j,nnz,i,d;
3230   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3231   const PetscInt *JJ;
3232   PetscScalar    *values;
3233   PetscErrorCode ierr;
3234 
3235   PetscFunctionBegin;
3236   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3237 
3238   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3239   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3240   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3241   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3242   m      = B->rmap->n;
3243   cstart = B->cmap->rstart;
3244   cend   = B->cmap->rend;
3245   rstart = B->rmap->rstart;
3246 
3247   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3248 
3249 #if defined(PETSC_USE_DEBUGGING)
3250   for (i=0; i<m; i++) {
3251     nnz     = Ii[i+1]- Ii[i];
3252     JJ      = J + Ii[i];
3253     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3254     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3255     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3256   }
3257 #endif
3258 
3259   for (i=0; i<m; i++) {
3260     nnz     = Ii[i+1]- Ii[i];
3261     JJ      = J + Ii[i];
3262     nnz_max = PetscMax(nnz_max,nnz);
3263     d       = 0;
3264     for (j=0; j<nnz; j++) {
3265       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3266     }
3267     d_nnz[i] = d;
3268     o_nnz[i] = nnz - d;
3269   }
3270   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3271   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3272 
3273   if (v) values = (PetscScalar*)v;
3274   else {
3275     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3276     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3277   }
3278 
3279   for (i=0; i<m; i++) {
3280     ii   = i + rstart;
3281     nnz  = Ii[i+1]- Ii[i];
3282     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3283   }
3284   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3285   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3286 
3287   if (!v) {
3288     ierr = PetscFree(values);CHKERRQ(ierr);
3289   }
3290   PetscFunctionReturn(0);
3291 }
3292 EXTERN_C_END
3293 
3294 #undef __FUNCT__
3295 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3296 /*@
3297    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3298    (the default parallel PETSc format).
3299 
3300    Collective on MPI_Comm
3301 
3302    Input Parameters:
3303 +  B - the matrix
3304 .  i - the indices into j for the start of each local row (starts with zero)
3305 .  j - the column indices for each local row (starts with zero)
3306 -  v - optional values in the matrix
3307 
3308    Level: developer
3309 
3310    Notes:
3311        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3312      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3313      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3314 
3315        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3316 
3317        The format which is used for the sparse matrix input, is equivalent to a
3318     row-major ordering.. i.e for the following matrix, the input data expected is
3319     as shown:
3320 
3321         1 0 0
3322         2 0 3     P0
3323        -------
3324         4 5 6     P1
3325 
3326      Process0 [P0]: rows_owned=[0,1]
3327         i =  {0,1,3}  [size = nrow+1  = 2+1]
3328         j =  {0,0,2}  [size = nz = 6]
3329         v =  {1,2,3}  [size = nz = 6]
3330 
3331      Process1 [P1]: rows_owned=[2]
3332         i =  {0,3}    [size = nrow+1  = 1+1]
3333         j =  {0,1,2}  [size = nz = 6]
3334         v =  {4,5,6}  [size = nz = 6]
3335 
3336 .keywords: matrix, aij, compressed row, sparse, parallel
3337 
3338 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3339           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3340 @*/
3341 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3342 {
3343   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3344 
3345   PetscFunctionBegin;
3346   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3347   if (f) {
3348     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3349   }
3350   PetscFunctionReturn(0);
3351 }
3352 
3353 #undef __FUNCT__
3354 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3355 /*@C
3356    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3357    (the default parallel PETSc format).  For good matrix assembly performance
3358    the user should preallocate the matrix storage by setting the parameters
3359    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3360    performance can be increased by more than a factor of 50.
3361 
3362    Collective on MPI_Comm
3363 
3364    Input Parameters:
3365 +  A - the matrix
3366 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3367            (same value is used for all local rows)
3368 .  d_nnz - array containing the number of nonzeros in the various rows of the
3369            DIAGONAL portion of the local submatrix (possibly different for each row)
3370            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3371            The size of this array is equal to the number of local rows, i.e 'm'.
3372            You must leave room for the diagonal entry even if it is zero.
3373 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3374            submatrix (same value is used for all local rows).
3375 -  o_nnz - array containing the number of nonzeros in the various rows of the
3376            OFF-DIAGONAL portion of the local submatrix (possibly different for
3377            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3378            structure. The size of this array is equal to the number
3379            of local rows, i.e 'm'.
3380 
3381    If the *_nnz parameter is given then the *_nz parameter is ignored
3382 
3383    The AIJ format (also called the Yale sparse matrix format or
3384    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3385    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3386 
3387    The parallel matrix is partitioned such that the first m0 rows belong to
3388    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3389    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3390 
3391    The DIAGONAL portion of the local submatrix of a processor can be defined
3392    as the submatrix which is obtained by extraction the part corresponding
3393    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3394    first row that belongs to the processor, and r2 is the last row belonging
3395    to the this processor. This is a square mxm matrix. The remaining portion
3396    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3397 
3398    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3399 
3400    You can call MatGetInfo() to get information on how effective the preallocation was;
3401    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3402    You can also run with the option -info and look for messages with the string
3403    malloc in them to see if additional memory allocation was needed.
3404 
3405    Example usage:
3406 
3407    Consider the following 8x8 matrix with 34 non-zero values, that is
3408    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3409    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3410    as follows:
3411 
3412 .vb
3413             1  2  0  |  0  3  0  |  0  4
3414     Proc0   0  5  6  |  7  0  0  |  8  0
3415             9  0 10  | 11  0  0  | 12  0
3416     -------------------------------------
3417            13  0 14  | 15 16 17  |  0  0
3418     Proc1   0 18  0  | 19 20 21  |  0  0
3419             0  0  0  | 22 23  0  | 24  0
3420     -------------------------------------
3421     Proc2  25 26 27  |  0  0 28  | 29  0
3422            30  0  0  | 31 32 33  |  0 34
3423 .ve
3424 
3425    This can be represented as a collection of submatrices as:
3426 
3427 .vb
3428       A B C
3429       D E F
3430       G H I
3431 .ve
3432 
3433    Where the submatrices A,B,C are owned by proc0, D,E,F are
3434    owned by proc1, G,H,I are owned by proc2.
3435 
3436    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3437    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3438    The 'M','N' parameters are 8,8, and have the same values on all procs.
3439 
3440    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3441    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3442    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3443    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3444    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3445    matrix, ans [DF] as another SeqAIJ matrix.
3446 
3447    When d_nz, o_nz parameters are specified, d_nz storage elements are
3448    allocated for every row of the local diagonal submatrix, and o_nz
3449    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3450    One way to choose d_nz and o_nz is to use the max nonzerors per local
3451    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3452    In this case, the values of d_nz,o_nz are:
3453 .vb
3454      proc0 : dnz = 2, o_nz = 2
3455      proc1 : dnz = 3, o_nz = 2
3456      proc2 : dnz = 1, o_nz = 4
3457 .ve
3458    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3459    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3460    for proc3. i.e we are using 12+15+10=37 storage locations to store
3461    34 values.
3462 
3463    When d_nnz, o_nnz parameters are specified, the storage is specified
3464    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3465    In the above case the values for d_nnz,o_nnz are:
3466 .vb
3467      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3468      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3469      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3470 .ve
3471    Here the space allocated is sum of all the above values i.e 34, and
3472    hence pre-allocation is perfect.
3473 
3474    Level: intermediate
3475 
3476 .keywords: matrix, aij, compressed row, sparse, parallel
3477 
3478 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3479           MPIAIJ, MatGetInfo()
3480 @*/
3481 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3482 {
3483   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3484 
3485   PetscFunctionBegin;
3486   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3487   if (f) {
3488     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3489   }
3490   PetscFunctionReturn(0);
3491 }
3492 
3493 #undef __FUNCT__
3494 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3495 /*@
3496      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3497          CSR format the local rows.
3498 
3499    Collective on MPI_Comm
3500 
3501    Input Parameters:
3502 +  comm - MPI communicator
3503 .  m - number of local rows (Cannot be PETSC_DECIDE)
3504 .  n - This value should be the same as the local size used in creating the
3505        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3506        calculated if N is given) For square matrices n is almost always m.
3507 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3508 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3509 .   i - row indices
3510 .   j - column indices
3511 -   a - matrix values
3512 
3513    Output Parameter:
3514 .   mat - the matrix
3515 
3516    Level: intermediate
3517 
3518    Notes:
3519        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3520      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3521      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3522 
3523        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3524 
3525        The format which is used for the sparse matrix input, is equivalent to a
3526     row-major ordering.. i.e for the following matrix, the input data expected is
3527     as shown:
3528 
3529         1 0 0
3530         2 0 3     P0
3531        -------
3532         4 5 6     P1
3533 
3534      Process0 [P0]: rows_owned=[0,1]
3535         i =  {0,1,3}  [size = nrow+1  = 2+1]
3536         j =  {0,0,2}  [size = nz = 6]
3537         v =  {1,2,3}  [size = nz = 6]
3538 
3539      Process1 [P1]: rows_owned=[2]
3540         i =  {0,3}    [size = nrow+1  = 1+1]
3541         j =  {0,1,2}  [size = nz = 6]
3542         v =  {4,5,6}  [size = nz = 6]
3543 
3544 .keywords: matrix, aij, compressed row, sparse, parallel
3545 
3546 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3547           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3548 @*/
3549 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3550 {
3551   PetscErrorCode ierr;
3552 
3553  PetscFunctionBegin;
3554   if (i[0]) {
3555     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3556   }
3557   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3558   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3559   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3560   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3561   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3562   PetscFunctionReturn(0);
3563 }
3564 
3565 #undef __FUNCT__
3566 #define __FUNCT__ "MatCreateMPIAIJ"
3567 /*@C
3568    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3569    (the default parallel PETSc format).  For good matrix assembly performance
3570    the user should preallocate the matrix storage by setting the parameters
3571    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3572    performance can be increased by more than a factor of 50.
3573 
3574    Collective on MPI_Comm
3575 
3576    Input Parameters:
3577 +  comm - MPI communicator
3578 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3579            This value should be the same as the local size used in creating the
3580            y vector for the matrix-vector product y = Ax.
3581 .  n - This value should be the same as the local size used in creating the
3582        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3583        calculated if N is given) For square matrices n is almost always m.
3584 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3585 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3586 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3587            (same value is used for all local rows)
3588 .  d_nnz - array containing the number of nonzeros in the various rows of the
3589            DIAGONAL portion of the local submatrix (possibly different for each row)
3590            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3591            The size of this array is equal to the number of local rows, i.e 'm'.
3592            You must leave room for the diagonal entry even if it is zero.
3593 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3594            submatrix (same value is used for all local rows).
3595 -  o_nnz - array containing the number of nonzeros in the various rows of the
3596            OFF-DIAGONAL portion of the local submatrix (possibly different for
3597            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3598            structure. The size of this array is equal to the number
3599            of local rows, i.e 'm'.
3600 
3601    Output Parameter:
3602 .  A - the matrix
3603 
3604    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3605    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3606    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3607 
3608    Notes:
3609    If the *_nnz parameter is given then the *_nz parameter is ignored
3610 
3611    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3612    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3613    storage requirements for this matrix.
3614 
3615    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3616    processor than it must be used on all processors that share the object for
3617    that argument.
3618 
3619    The user MUST specify either the local or global matrix dimensions
3620    (possibly both).
3621 
3622    The parallel matrix is partitioned across processors such that the
3623    first m0 rows belong to process 0, the next m1 rows belong to
3624    process 1, the next m2 rows belong to process 2 etc.. where
3625    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3626    values corresponding to [m x N] submatrix.
3627 
3628    The columns are logically partitioned with the n0 columns belonging
3629    to 0th partition, the next n1 columns belonging to the next
3630    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3631 
3632    The DIAGONAL portion of the local submatrix on any given processor
3633    is the submatrix corresponding to the rows and columns m,n
3634    corresponding to the given processor. i.e diagonal matrix on
3635    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3636    etc. The remaining portion of the local submatrix [m x (N-n)]
3637    constitute the OFF-DIAGONAL portion. The example below better
3638    illustrates this concept.
3639 
3640    For a square global matrix we define each processor's diagonal portion
3641    to be its local rows and the corresponding columns (a square submatrix);
3642    each processor's off-diagonal portion encompasses the remainder of the
3643    local matrix (a rectangular submatrix).
3644 
3645    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3646 
3647    When calling this routine with a single process communicator, a matrix of
3648    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3649    type of communicator, use the construction mechanism:
3650      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3651 
3652    By default, this format uses inodes (identical nodes) when possible.
3653    We search for consecutive rows with the same nonzero structure, thereby
3654    reusing matrix information to achieve increased efficiency.
3655 
3656    Options Database Keys:
3657 +  -mat_no_inode  - Do not use inodes
3658 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3659 -  -mat_aij_oneindex - Internally use indexing starting at 1
3660         rather than 0.  Note that when calling MatSetValues(),
3661         the user still MUST index entries starting at 0!
3662 
3663 
3664    Example usage:
3665 
3666    Consider the following 8x8 matrix with 34 non-zero values, that is
3667    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3668    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3669    as follows:
3670 
3671 .vb
3672             1  2  0  |  0  3  0  |  0  4
3673     Proc0   0  5  6  |  7  0  0  |  8  0
3674             9  0 10  | 11  0  0  | 12  0
3675     -------------------------------------
3676            13  0 14  | 15 16 17  |  0  0
3677     Proc1   0 18  0  | 19 20 21  |  0  0
3678             0  0  0  | 22 23  0  | 24  0
3679     -------------------------------------
3680     Proc2  25 26 27  |  0  0 28  | 29  0
3681            30  0  0  | 31 32 33  |  0 34
3682 .ve
3683 
3684    This can be represented as a collection of submatrices as:
3685 
3686 .vb
3687       A B C
3688       D E F
3689       G H I
3690 .ve
3691 
3692    Where the submatrices A,B,C are owned by proc0, D,E,F are
3693    owned by proc1, G,H,I are owned by proc2.
3694 
3695    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3696    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3697    The 'M','N' parameters are 8,8, and have the same values on all procs.
3698 
3699    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3700    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3701    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3702    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3703    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3704    matrix, ans [DF] as another SeqAIJ matrix.
3705 
3706    When d_nz, o_nz parameters are specified, d_nz storage elements are
3707    allocated for every row of the local diagonal submatrix, and o_nz
3708    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3709    One way to choose d_nz and o_nz is to use the max nonzerors per local
3710    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3711    In this case, the values of d_nz,o_nz are:
3712 .vb
3713      proc0 : dnz = 2, o_nz = 2
3714      proc1 : dnz = 3, o_nz = 2
3715      proc2 : dnz = 1, o_nz = 4
3716 .ve
3717    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3718    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3719    for proc3. i.e we are using 12+15+10=37 storage locations to store
3720    34 values.
3721 
3722    When d_nnz, o_nnz parameters are specified, the storage is specified
3723    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3724    In the above case the values for d_nnz,o_nnz are:
3725 .vb
3726      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3727      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3728      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3729 .ve
3730    Here the space allocated is sum of all the above values i.e 34, and
3731    hence pre-allocation is perfect.
3732 
3733    Level: intermediate
3734 
3735 .keywords: matrix, aij, compressed row, sparse, parallel
3736 
3737 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3738           MPIAIJ, MatCreateMPIAIJWithArrays()
3739 @*/
3740 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3741 {
3742   PetscErrorCode ierr;
3743   PetscMPIInt    size;
3744 
3745   PetscFunctionBegin;
3746   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3747   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3748   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3749   if (size > 1) {
3750     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3751     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3752   } else {
3753     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3754     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3755   }
3756   PetscFunctionReturn(0);
3757 }
3758 
3759 #undef __FUNCT__
3760 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3761 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3762 {
3763   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3764 
3765   PetscFunctionBegin;
3766   *Ad     = a->A;
3767   *Ao     = a->B;
3768   *colmap = a->garray;
3769   PetscFunctionReturn(0);
3770 }
3771 
3772 #undef __FUNCT__
3773 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3774 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3775 {
3776   PetscErrorCode ierr;
3777   PetscInt       i;
3778   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3779 
3780   PetscFunctionBegin;
3781   if (coloring->ctype == IS_COLORING_GLOBAL) {
3782     ISColoringValue *allcolors,*colors;
3783     ISColoring      ocoloring;
3784 
3785     /* set coloring for diagonal portion */
3786     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3787 
3788     /* set coloring for off-diagonal portion */
3789     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3790     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3791     for (i=0; i<a->B->cmap->n; i++) {
3792       colors[i] = allcolors[a->garray[i]];
3793     }
3794     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3795     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3796     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3797     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3798   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3799     ISColoringValue *colors;
3800     PetscInt        *larray;
3801     ISColoring      ocoloring;
3802 
3803     /* set coloring for diagonal portion */
3804     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3805     for (i=0; i<a->A->cmap->n; i++) {
3806       larray[i] = i + A->cmap->rstart;
3807     }
3808     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3809     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3810     for (i=0; i<a->A->cmap->n; i++) {
3811       colors[i] = coloring->colors[larray[i]];
3812     }
3813     ierr = PetscFree(larray);CHKERRQ(ierr);
3814     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3815     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3816     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3817 
3818     /* set coloring for off-diagonal portion */
3819     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3820     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3821     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3822     for (i=0; i<a->B->cmap->n; i++) {
3823       colors[i] = coloring->colors[larray[i]];
3824     }
3825     ierr = PetscFree(larray);CHKERRQ(ierr);
3826     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3827     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3828     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3829   } else {
3830     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3831   }
3832 
3833   PetscFunctionReturn(0);
3834 }
3835 
3836 #if defined(PETSC_HAVE_ADIC)
3837 #undef __FUNCT__
3838 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3839 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3840 {
3841   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3842   PetscErrorCode ierr;
3843 
3844   PetscFunctionBegin;
3845   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3846   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3847   PetscFunctionReturn(0);
3848 }
3849 #endif
3850 
3851 #undef __FUNCT__
3852 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3853 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3854 {
3855   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3856   PetscErrorCode ierr;
3857 
3858   PetscFunctionBegin;
3859   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3860   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3861   PetscFunctionReturn(0);
3862 }
3863 
3864 #undef __FUNCT__
3865 #define __FUNCT__ "MatMerge"
3866 /*@
3867       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3868                  matrices from each processor
3869 
3870     Collective on MPI_Comm
3871 
3872    Input Parameters:
3873 +    comm - the communicators the parallel matrix will live on
3874 .    inmat - the input sequential matrices
3875 .    n - number of local columns (or PETSC_DECIDE)
3876 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3877 
3878    Output Parameter:
3879 .    outmat - the parallel matrix generated
3880 
3881     Level: advanced
3882 
3883    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3884 
3885 @*/
3886 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3887 {
3888   PetscErrorCode ierr;
3889   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3890   PetscInt       *indx;
3891   PetscScalar    *values;
3892 
3893   PetscFunctionBegin;
3894   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3895   if (scall == MAT_INITIAL_MATRIX){
3896     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3897     if (n == PETSC_DECIDE){
3898       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3899     }
3900     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3901     rstart -= m;
3902 
3903     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3904     for (i=0;i<m;i++) {
3905       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3906       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3907       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3908     }
3909     /* This routine will ONLY return MPIAIJ type matrix */
3910     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3911     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3912     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3913     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3914     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3915 
3916   } else if (scall == MAT_REUSE_MATRIX){
3917     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3918   } else {
3919     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3920   }
3921 
3922   for (i=0;i<m;i++) {
3923     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3924     Ii    = i + rstart;
3925     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3926     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3927   }
3928   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3929   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3930   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3931 
3932   PetscFunctionReturn(0);
3933 }
3934 
3935 #undef __FUNCT__
3936 #define __FUNCT__ "MatFileSplit"
3937 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3938 {
3939   PetscErrorCode    ierr;
3940   PetscMPIInt       rank;
3941   PetscInt          m,N,i,rstart,nnz;
3942   size_t            len;
3943   const PetscInt    *indx;
3944   PetscViewer       out;
3945   char              *name;
3946   Mat               B;
3947   const PetscScalar *values;
3948 
3949   PetscFunctionBegin;
3950   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3951   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3952   /* Should this be the type of the diagonal block of A? */
3953   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3954   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3955   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3956   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3957   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3958   for (i=0;i<m;i++) {
3959     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3960     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3961     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3962   }
3963   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3964   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3965 
3966   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3967   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3968   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3969   sprintf(name,"%s.%d",outfile,rank);
3970   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3971   ierr = PetscFree(name);
3972   ierr = MatView(B,out);CHKERRQ(ierr);
3973   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3974   ierr = MatDestroy(B);CHKERRQ(ierr);
3975   PetscFunctionReturn(0);
3976 }
3977 
3978 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3979 #undef __FUNCT__
3980 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3981 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3982 {
3983   PetscErrorCode       ierr;
3984   Mat_Merge_SeqsToMPI  *merge;
3985   PetscContainer       container;
3986 
3987   PetscFunctionBegin;
3988   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3989   if (container) {
3990     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3991     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3992     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3993     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3994     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4002     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4003     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4004 
4005     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4006     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4007   }
4008   ierr = PetscFree(merge);CHKERRQ(ierr);
4009 
4010   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4011   PetscFunctionReturn(0);
4012 }
4013 
4014 #include "../src/mat/utils/freespace.h"
4015 #include "petscbt.h"
4016 
4017 #undef __FUNCT__
4018 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4019 /*@C
4020       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4021                  matrices from each processor
4022 
4023     Collective on MPI_Comm
4024 
4025    Input Parameters:
4026 +    comm - the communicators the parallel matrix will live on
4027 .    seqmat - the input sequential matrices
4028 .    m - number of local rows (or PETSC_DECIDE)
4029 .    n - number of local columns (or PETSC_DECIDE)
4030 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4031 
4032    Output Parameter:
4033 .    mpimat - the parallel matrix generated
4034 
4035     Level: advanced
4036 
4037    Notes:
4038      The dimensions of the sequential matrix in each processor MUST be the same.
4039      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4040      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4041 @*/
4042 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4043 {
4044   PetscErrorCode       ierr;
4045   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4046   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4047   PetscMPIInt          size,rank,taga,*len_s;
4048   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4049   PetscInt             proc,m;
4050   PetscInt             **buf_ri,**buf_rj;
4051   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4052   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4053   MPI_Request          *s_waits,*r_waits;
4054   MPI_Status           *status;
4055   MatScalar            *aa=a->a;
4056   MatScalar            **abuf_r,*ba_i;
4057   Mat_Merge_SeqsToMPI  *merge;
4058   PetscContainer       container;
4059 
4060   PetscFunctionBegin;
4061   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4062 
4063   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4064   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4065 
4066   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4067   if (container) {
4068     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4069   }
4070   bi     = merge->bi;
4071   bj     = merge->bj;
4072   buf_ri = merge->buf_ri;
4073   buf_rj = merge->buf_rj;
4074 
4075   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4076   owners = merge->rowmap->range;
4077   len_s  = merge->len_s;
4078 
4079   /* send and recv matrix values */
4080   /*-----------------------------*/
4081   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4082   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4083 
4084   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4085   for (proc=0,k=0; proc<size; proc++){
4086     if (!len_s[proc]) continue;
4087     i = owners[proc];
4088     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4089     k++;
4090   }
4091 
4092   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4093   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4094   ierr = PetscFree(status);CHKERRQ(ierr);
4095 
4096   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4097   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4098 
4099   /* insert mat values of mpimat */
4100   /*----------------------------*/
4101   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4102   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4103 
4104   for (k=0; k<merge->nrecv; k++){
4105     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4106     nrows = *(buf_ri_k[k]);
4107     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4108     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4109   }
4110 
4111   /* set values of ba */
4112   m = merge->rowmap->n;
4113   for (i=0; i<m; i++) {
4114     arow = owners[rank] + i;
4115     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4116     bnzi = bi[i+1] - bi[i];
4117     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4118 
4119     /* add local non-zero vals of this proc's seqmat into ba */
4120     anzi = ai[arow+1] - ai[arow];
4121     aj   = a->j + ai[arow];
4122     aa   = a->a + ai[arow];
4123     nextaj = 0;
4124     for (j=0; nextaj<anzi; j++){
4125       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4126         ba_i[j] += aa[nextaj++];
4127       }
4128     }
4129 
4130     /* add received vals into ba */
4131     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4132       /* i-th row */
4133       if (i == *nextrow[k]) {
4134         anzi = *(nextai[k]+1) - *nextai[k];
4135         aj   = buf_rj[k] + *(nextai[k]);
4136         aa   = abuf_r[k] + *(nextai[k]);
4137         nextaj = 0;
4138         for (j=0; nextaj<anzi; j++){
4139           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4140             ba_i[j] += aa[nextaj++];
4141           }
4142         }
4143         nextrow[k]++; nextai[k]++;
4144       }
4145     }
4146     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4147   }
4148   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4149   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4150 
4151   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4152   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4153   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4154   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4155   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4156   PetscFunctionReturn(0);
4157 }
4158 
4159 #undef __FUNCT__
4160 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4161 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4162 {
4163   PetscErrorCode       ierr;
4164   Mat                  B_mpi;
4165   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4166   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4167   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4168   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4169   PetscInt             len,proc,*dnz,*onz;
4170   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4171   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4172   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4173   MPI_Status           *status;
4174   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4175   PetscBT              lnkbt;
4176   Mat_Merge_SeqsToMPI  *merge;
4177   PetscContainer       container;
4178 
4179   PetscFunctionBegin;
4180   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4181 
4182   /* make sure it is a PETSc comm */
4183   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4184   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4185   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4186 
4187   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4188   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4189 
4190   /* determine row ownership */
4191   /*---------------------------------------------------------*/
4192   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4193   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4194   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4195   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4196   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4197   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4198   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4199 
4200   m      = merge->rowmap->n;
4201   M      = merge->rowmap->N;
4202   owners = merge->rowmap->range;
4203 
4204   /* determine the number of messages to send, their lengths */
4205   /*---------------------------------------------------------*/
4206   len_s  = merge->len_s;
4207 
4208   len = 0;  /* length of buf_si[] */
4209   merge->nsend = 0;
4210   for (proc=0; proc<size; proc++){
4211     len_si[proc] = 0;
4212     if (proc == rank){
4213       len_s[proc] = 0;
4214     } else {
4215       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4216       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4217     }
4218     if (len_s[proc]) {
4219       merge->nsend++;
4220       nrows = 0;
4221       for (i=owners[proc]; i<owners[proc+1]; i++){
4222         if (ai[i+1] > ai[i]) nrows++;
4223       }
4224       len_si[proc] = 2*(nrows+1);
4225       len += len_si[proc];
4226     }
4227   }
4228 
4229   /* determine the number and length of messages to receive for ij-structure */
4230   /*-------------------------------------------------------------------------*/
4231   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4232   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4233 
4234   /* post the Irecv of j-structure */
4235   /*-------------------------------*/
4236   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4237   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4238 
4239   /* post the Isend of j-structure */
4240   /*--------------------------------*/
4241   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4242 
4243   for (proc=0, k=0; proc<size; proc++){
4244     if (!len_s[proc]) continue;
4245     i = owners[proc];
4246     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4247     k++;
4248   }
4249 
4250   /* receives and sends of j-structure are complete */
4251   /*------------------------------------------------*/
4252   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4253   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4254 
4255   /* send and recv i-structure */
4256   /*---------------------------*/
4257   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4258   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4259 
4260   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4261   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4262   for (proc=0,k=0; proc<size; proc++){
4263     if (!len_s[proc]) continue;
4264     /* form outgoing message for i-structure:
4265          buf_si[0]:                 nrows to be sent
4266                [1:nrows]:           row index (global)
4267                [nrows+1:2*nrows+1]: i-structure index
4268     */
4269     /*-------------------------------------------*/
4270     nrows = len_si[proc]/2 - 1;
4271     buf_si_i    = buf_si + nrows+1;
4272     buf_si[0]   = nrows;
4273     buf_si_i[0] = 0;
4274     nrows = 0;
4275     for (i=owners[proc]; i<owners[proc+1]; i++){
4276       anzi = ai[i+1] - ai[i];
4277       if (anzi) {
4278         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4279         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4280         nrows++;
4281       }
4282     }
4283     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4284     k++;
4285     buf_si += len_si[proc];
4286   }
4287 
4288   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4289   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4290 
4291   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4292   for (i=0; i<merge->nrecv; i++){
4293     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4294   }
4295 
4296   ierr = PetscFree(len_si);CHKERRQ(ierr);
4297   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4298   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4299   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4300   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4301   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4302   ierr = PetscFree(status);CHKERRQ(ierr);
4303 
4304   /* compute a local seq matrix in each processor */
4305   /*----------------------------------------------*/
4306   /* allocate bi array and free space for accumulating nonzero column info */
4307   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4308   bi[0] = 0;
4309 
4310   /* create and initialize a linked list */
4311   nlnk = N+1;
4312   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4313 
4314   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4315   len = 0;
4316   len  = ai[owners[rank+1]] - ai[owners[rank]];
4317   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4318   current_space = free_space;
4319 
4320   /* determine symbolic info for each local row */
4321   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4322 
4323   for (k=0; k<merge->nrecv; k++){
4324     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4325     nrows = *buf_ri_k[k];
4326     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4327     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4328   }
4329 
4330   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4331   len = 0;
4332   for (i=0;i<m;i++) {
4333     bnzi   = 0;
4334     /* add local non-zero cols of this proc's seqmat into lnk */
4335     arow   = owners[rank] + i;
4336     anzi   = ai[arow+1] - ai[arow];
4337     aj     = a->j + ai[arow];
4338     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4339     bnzi += nlnk;
4340     /* add received col data into lnk */
4341     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4342       if (i == *nextrow[k]) { /* i-th row */
4343         anzi = *(nextai[k]+1) - *nextai[k];
4344         aj   = buf_rj[k] + *nextai[k];
4345         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4346         bnzi += nlnk;
4347         nextrow[k]++; nextai[k]++;
4348       }
4349     }
4350     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4351 
4352     /* if free space is not available, make more free space */
4353     if (current_space->local_remaining<bnzi) {
4354       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4355       nspacedouble++;
4356     }
4357     /* copy data into free space, then initialize lnk */
4358     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4359     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4360 
4361     current_space->array           += bnzi;
4362     current_space->local_used      += bnzi;
4363     current_space->local_remaining -= bnzi;
4364 
4365     bi[i+1] = bi[i] + bnzi;
4366   }
4367 
4368   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4369 
4370   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4371   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4372   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4373 
4374   /* create symbolic parallel matrix B_mpi */
4375   /*---------------------------------------*/
4376   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4377   if (n==PETSC_DECIDE) {
4378     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4379   } else {
4380     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4381   }
4382   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4383   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4384   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4385 
4386   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4387   B_mpi->assembled     = PETSC_FALSE;
4388   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4389   merge->bi            = bi;
4390   merge->bj            = bj;
4391   merge->buf_ri        = buf_ri;
4392   merge->buf_rj        = buf_rj;
4393   merge->coi           = PETSC_NULL;
4394   merge->coj           = PETSC_NULL;
4395   merge->owners_co     = PETSC_NULL;
4396 
4397   /* attach the supporting struct to B_mpi for reuse */
4398   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4399   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4400   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4401   *mpimat = B_mpi;
4402 
4403   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4404   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4405   PetscFunctionReturn(0);
4406 }
4407 
4408 #undef __FUNCT__
4409 #define __FUNCT__ "MatMerge_SeqsToMPI"
4410 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4411 {
4412   PetscErrorCode   ierr;
4413 
4414   PetscFunctionBegin;
4415   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4416   if (scall == MAT_INITIAL_MATRIX){
4417     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4418   }
4419   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4420   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4421   PetscFunctionReturn(0);
4422 }
4423 
4424 #undef __FUNCT__
4425 #define __FUNCT__ "MatGetLocalMat"
4426 /*@
4427      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4428 
4429     Not Collective
4430 
4431    Input Parameters:
4432 +    A - the matrix
4433 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4434 
4435    Output Parameter:
4436 .    A_loc - the local sequential matrix generated
4437 
4438     Level: developer
4439 
4440 @*/
4441 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4442 {
4443   PetscErrorCode  ierr;
4444   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4445   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4446   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4447   MatScalar       *aa=a->a,*ba=b->a,*cam;
4448   PetscScalar     *ca;
4449   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4450   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4451 
4452   PetscFunctionBegin;
4453   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4454   if (scall == MAT_INITIAL_MATRIX){
4455     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4456     ci[0] = 0;
4457     for (i=0; i<am; i++){
4458       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4459     }
4460     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4461     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4462     k = 0;
4463     for (i=0; i<am; i++) {
4464       ncols_o = bi[i+1] - bi[i];
4465       ncols_d = ai[i+1] - ai[i];
4466       /* off-diagonal portion of A */
4467       for (jo=0; jo<ncols_o; jo++) {
4468         col = cmap[*bj];
4469         if (col >= cstart) break;
4470         cj[k]   = col; bj++;
4471         ca[k++] = *ba++;
4472       }
4473       /* diagonal portion of A */
4474       for (j=0; j<ncols_d; j++) {
4475         cj[k]   = cstart + *aj++;
4476         ca[k++] = *aa++;
4477       }
4478       /* off-diagonal portion of A */
4479       for (j=jo; j<ncols_o; j++) {
4480         cj[k]   = cmap[*bj++];
4481         ca[k++] = *ba++;
4482       }
4483     }
4484     /* put together the new matrix */
4485     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4486     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4487     /* Since these are PETSc arrays, change flags to free them as necessary. */
4488     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4489     mat->free_a  = PETSC_TRUE;
4490     mat->free_ij = PETSC_TRUE;
4491     mat->nonew   = 0;
4492   } else if (scall == MAT_REUSE_MATRIX){
4493     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4494     ci = mat->i; cj = mat->j; cam = mat->a;
4495     for (i=0; i<am; i++) {
4496       /* off-diagonal portion of A */
4497       ncols_o = bi[i+1] - bi[i];
4498       for (jo=0; jo<ncols_o; jo++) {
4499         col = cmap[*bj];
4500         if (col >= cstart) break;
4501         *cam++ = *ba++; bj++;
4502       }
4503       /* diagonal portion of A */
4504       ncols_d = ai[i+1] - ai[i];
4505       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4506       /* off-diagonal portion of A */
4507       for (j=jo; j<ncols_o; j++) {
4508         *cam++ = *ba++; bj++;
4509       }
4510     }
4511   } else {
4512     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4513   }
4514 
4515   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4516   PetscFunctionReturn(0);
4517 }
4518 
4519 #undef __FUNCT__
4520 #define __FUNCT__ "MatGetLocalMatCondensed"
4521 /*@C
4522      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4523 
4524     Not Collective
4525 
4526    Input Parameters:
4527 +    A - the matrix
4528 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4529 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4530 
4531    Output Parameter:
4532 .    A_loc - the local sequential matrix generated
4533 
4534     Level: developer
4535 
4536 @*/
4537 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4538 {
4539   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4540   PetscErrorCode    ierr;
4541   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4542   IS                isrowa,iscola;
4543   Mat               *aloc;
4544 
4545   PetscFunctionBegin;
4546   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4547   if (!row){
4548     start = A->rmap->rstart; end = A->rmap->rend;
4549     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4550   } else {
4551     isrowa = *row;
4552   }
4553   if (!col){
4554     start = A->cmap->rstart;
4555     cmap  = a->garray;
4556     nzA   = a->A->cmap->n;
4557     nzB   = a->B->cmap->n;
4558     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4559     ncols = 0;
4560     for (i=0; i<nzB; i++) {
4561       if (cmap[i] < start) idx[ncols++] = cmap[i];
4562       else break;
4563     }
4564     imark = i;
4565     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4566     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4567     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4568     ierr = PetscFree(idx);CHKERRQ(ierr);
4569   } else {
4570     iscola = *col;
4571   }
4572   if (scall != MAT_INITIAL_MATRIX){
4573     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4574     aloc[0] = *A_loc;
4575   }
4576   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4577   *A_loc = aloc[0];
4578   ierr = PetscFree(aloc);CHKERRQ(ierr);
4579   if (!row){
4580     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4581   }
4582   if (!col){
4583     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4584   }
4585   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4586   PetscFunctionReturn(0);
4587 }
4588 
4589 #undef __FUNCT__
4590 #define __FUNCT__ "MatGetBrowsOfAcols"
4591 /*@C
4592     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4593 
4594     Collective on Mat
4595 
4596    Input Parameters:
4597 +    A,B - the matrices in mpiaij format
4598 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4599 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4600 
4601    Output Parameter:
4602 +    rowb, colb - index sets of rows and columns of B to extract
4603 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4604 -    B_seq - the sequential matrix generated
4605 
4606     Level: developer
4607 
4608 @*/
4609 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4610 {
4611   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4612   PetscErrorCode    ierr;
4613   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4614   IS                isrowb,iscolb;
4615   Mat               *bseq;
4616 
4617   PetscFunctionBegin;
4618   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4619     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4620   }
4621   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4622 
4623   if (scall == MAT_INITIAL_MATRIX){
4624     start = A->cmap->rstart;
4625     cmap  = a->garray;
4626     nzA   = a->A->cmap->n;
4627     nzB   = a->B->cmap->n;
4628     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4629     ncols = 0;
4630     for (i=0; i<nzB; i++) {  /* row < local row index */
4631       if (cmap[i] < start) idx[ncols++] = cmap[i];
4632       else break;
4633     }
4634     imark = i;
4635     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4636     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4637     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4638     ierr = PetscFree(idx);CHKERRQ(ierr);
4639     *brstart = imark;
4640     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4641   } else {
4642     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4643     isrowb = *rowb; iscolb = *colb;
4644     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4645     bseq[0] = *B_seq;
4646   }
4647   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4648   *B_seq = bseq[0];
4649   ierr = PetscFree(bseq);CHKERRQ(ierr);
4650   if (!rowb){
4651     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4652   } else {
4653     *rowb = isrowb;
4654   }
4655   if (!colb){
4656     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4657   } else {
4658     *colb = iscolb;
4659   }
4660   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4661   PetscFunctionReturn(0);
4662 }
4663 
4664 #undef __FUNCT__
4665 #define __FUNCT__ "MatGetBrowsOfAoCols"
4666 /*@C
4667     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4668     of the OFF-DIAGONAL portion of local A
4669 
4670     Collective on Mat
4671 
4672    Input Parameters:
4673 +    A,B - the matrices in mpiaij format
4674 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4675 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4676 .    startsj_r - similar to startsj for receives
4677 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4678 
4679    Output Parameter:
4680 +    B_oth - the sequential matrix generated
4681 
4682     Level: developer
4683 
4684 @*/
4685 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4686 {
4687   VecScatter_MPI_General *gen_to,*gen_from;
4688   PetscErrorCode         ierr;
4689   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4690   Mat_SeqAIJ             *b_oth;
4691   VecScatter             ctx=a->Mvctx;
4692   MPI_Comm               comm=((PetscObject)ctx)->comm;
4693   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4694   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4695   PetscScalar            *rvalues,*svalues;
4696   MatScalar              *b_otha,*bufa,*bufA;
4697   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4698   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4699   MPI_Status             *sstatus,rstatus;
4700   PetscMPIInt            jj;
4701   PetscInt               *cols,sbs,rbs;
4702   PetscScalar            *vals;
4703 
4704   PetscFunctionBegin;
4705   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4706     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4707   }
4708   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4709   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4710 
4711   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4712   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4713   rvalues  = gen_from->values; /* holds the length of receiving row */
4714   svalues  = gen_to->values;   /* holds the length of sending row */
4715   nrecvs   = gen_from->n;
4716   nsends   = gen_to->n;
4717 
4718   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4719   srow     = gen_to->indices;   /* local row index to be sent */
4720   sstarts  = gen_to->starts;
4721   sprocs   = gen_to->procs;
4722   sstatus  = gen_to->sstatus;
4723   sbs      = gen_to->bs;
4724   rstarts  = gen_from->starts;
4725   rprocs   = gen_from->procs;
4726   rbs      = gen_from->bs;
4727 
4728   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4729   if (scall == MAT_INITIAL_MATRIX){
4730     /* i-array */
4731     /*---------*/
4732     /*  post receives */
4733     for (i=0; i<nrecvs; i++){
4734       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4735       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4736       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4737     }
4738 
4739     /* pack the outgoing message */
4740     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4741     sstartsj[0] = 0;  rstartsj[0] = 0;
4742     len = 0; /* total length of j or a array to be sent */
4743     k = 0;
4744     for (i=0; i<nsends; i++){
4745       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4746       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4747       for (j=0; j<nrows; j++) {
4748         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4749         for (l=0; l<sbs; l++){
4750           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4751           rowlen[j*sbs+l] = ncols;
4752           len += ncols;
4753           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4754         }
4755         k++;
4756       }
4757       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4758       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4759     }
4760     /* recvs and sends of i-array are completed */
4761     i = nrecvs;
4762     while (i--) {
4763       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4764     }
4765     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4766 
4767     /* allocate buffers for sending j and a arrays */
4768     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4769     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4770 
4771     /* create i-array of B_oth */
4772     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4773     b_othi[0] = 0;
4774     len = 0; /* total length of j or a array to be received */
4775     k = 0;
4776     for (i=0; i<nrecvs; i++){
4777       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4778       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4779       for (j=0; j<nrows; j++) {
4780         b_othi[k+1] = b_othi[k] + rowlen[j];
4781         len += rowlen[j]; k++;
4782       }
4783       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4784     }
4785 
4786     /* allocate space for j and a arrrays of B_oth */
4787     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4788     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4789 
4790     /* j-array */
4791     /*---------*/
4792     /*  post receives of j-array */
4793     for (i=0; i<nrecvs; i++){
4794       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4795       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4796     }
4797 
4798     /* pack the outgoing message j-array */
4799     k = 0;
4800     for (i=0; i<nsends; i++){
4801       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4802       bufJ = bufj+sstartsj[i];
4803       for (j=0; j<nrows; j++) {
4804         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4805         for (ll=0; ll<sbs; ll++){
4806           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4807           for (l=0; l<ncols; l++){
4808             *bufJ++ = cols[l];
4809           }
4810           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4811         }
4812       }
4813       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4814     }
4815 
4816     /* recvs and sends of j-array are completed */
4817     i = nrecvs;
4818     while (i--) {
4819       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4820     }
4821     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4822   } else if (scall == MAT_REUSE_MATRIX){
4823     sstartsj = *startsj;
4824     rstartsj = *startsj_r;
4825     bufa     = *bufa_ptr;
4826     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4827     b_otha   = b_oth->a;
4828   } else {
4829     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4830   }
4831 
4832   /* a-array */
4833   /*---------*/
4834   /*  post receives of a-array */
4835   for (i=0; i<nrecvs; i++){
4836     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4837     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4838   }
4839 
4840   /* pack the outgoing message a-array */
4841   k = 0;
4842   for (i=0; i<nsends; i++){
4843     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4844     bufA = bufa+sstartsj[i];
4845     for (j=0; j<nrows; j++) {
4846       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4847       for (ll=0; ll<sbs; ll++){
4848         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4849         for (l=0; l<ncols; l++){
4850           *bufA++ = vals[l];
4851         }
4852         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4853       }
4854     }
4855     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4856   }
4857   /* recvs and sends of a-array are completed */
4858   i = nrecvs;
4859   while (i--) {
4860     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4861   }
4862   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4863   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4864 
4865   if (scall == MAT_INITIAL_MATRIX){
4866     /* put together the new matrix */
4867     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4868 
4869     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4870     /* Since these are PETSc arrays, change flags to free them as necessary. */
4871     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4872     b_oth->free_a  = PETSC_TRUE;
4873     b_oth->free_ij = PETSC_TRUE;
4874     b_oth->nonew   = 0;
4875 
4876     ierr = PetscFree(bufj);CHKERRQ(ierr);
4877     if (!startsj || !bufa_ptr){
4878       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4879       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4880     } else {
4881       *startsj   = sstartsj;
4882       *startsj_r = rstartsj;
4883       *bufa_ptr  = bufa;
4884     }
4885   }
4886   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4887   PetscFunctionReturn(0);
4888 }
4889 
4890 #undef __FUNCT__
4891 #define __FUNCT__ "MatGetCommunicationStructs"
4892 /*@C
4893   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4894 
4895   Not Collective
4896 
4897   Input Parameters:
4898 . A - The matrix in mpiaij format
4899 
4900   Output Parameter:
4901 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4902 . colmap - A map from global column index to local index into lvec
4903 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4904 
4905   Level: developer
4906 
4907 @*/
4908 #if defined (PETSC_USE_CTABLE)
4909 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4910 #else
4911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4912 #endif
4913 {
4914   Mat_MPIAIJ *a;
4915 
4916   PetscFunctionBegin;
4917   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4918   PetscValidPointer(lvec, 2)
4919   PetscValidPointer(colmap, 3)
4920   PetscValidPointer(multScatter, 4)
4921   a = (Mat_MPIAIJ *) A->data;
4922   if (lvec) *lvec = a->lvec;
4923   if (colmap) *colmap = a->colmap;
4924   if (multScatter) *multScatter = a->Mvctx;
4925   PetscFunctionReturn(0);
4926 }
4927 
4928 EXTERN_C_BEGIN
4929 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4930 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4931 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4932 EXTERN_C_END
4933 
4934 #undef __FUNCT__
4935 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4936 /*
4937     Computes (B'*A')' since computing B*A directly is untenable
4938 
4939                n                       p                          p
4940         (              )       (              )         (                  )
4941       m (      A       )  *  n (       B      )   =   m (         C        )
4942         (              )       (              )         (                  )
4943 
4944 */
4945 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4946 {
4947   PetscErrorCode     ierr;
4948   Mat                At,Bt,Ct;
4949 
4950   PetscFunctionBegin;
4951   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4952   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4953   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4954   ierr = MatDestroy(At);CHKERRQ(ierr);
4955   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4956   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4957   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4958   PetscFunctionReturn(0);
4959 }
4960 
4961 #undef __FUNCT__
4962 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4963 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4964 {
4965   PetscErrorCode ierr;
4966   PetscInt       m=A->rmap->n,n=B->cmap->n;
4967   Mat            Cmat;
4968 
4969   PetscFunctionBegin;
4970   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4971   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4972   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4973   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4974   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4975   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4976   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4977   *C   = Cmat;
4978   PetscFunctionReturn(0);
4979 }
4980 
4981 /* ----------------------------------------------------------------*/
4982 #undef __FUNCT__
4983 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4984 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4985 {
4986   PetscErrorCode ierr;
4987 
4988   PetscFunctionBegin;
4989   if (scall == MAT_INITIAL_MATRIX){
4990     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4991   }
4992   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4993   PetscFunctionReturn(0);
4994 }
4995 
4996 EXTERN_C_BEGIN
4997 #if defined(PETSC_HAVE_MUMPS)
4998 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4999 #endif
5000 #if defined(PETSC_HAVE_PASTIX)
5001 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5002 #endif
5003 #if defined(PETSC_HAVE_SUPERLU_DIST)
5004 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5005 #endif
5006 #if defined(PETSC_HAVE_SPOOLES)
5007 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5008 #endif
5009 EXTERN_C_END
5010 
5011 /*MC
5012    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5013 
5014    Options Database Keys:
5015 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5016 
5017   Level: beginner
5018 
5019 .seealso: MatCreateMPIAIJ()
5020 M*/
5021 
5022 EXTERN_C_BEGIN
5023 #undef __FUNCT__
5024 #define __FUNCT__ "MatCreate_MPIAIJ"
5025 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5026 {
5027   Mat_MPIAIJ     *b;
5028   PetscErrorCode ierr;
5029   PetscMPIInt    size;
5030 
5031   PetscFunctionBegin;
5032   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5033 
5034   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5035   B->data         = (void*)b;
5036   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5037   B->rmap->bs     = 1;
5038   B->assembled    = PETSC_FALSE;
5039   B->mapping      = 0;
5040 
5041   B->insertmode   = NOT_SET_VALUES;
5042   b->size         = size;
5043   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5044 
5045   /* build cache for off array entries formed */
5046   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5047   b->donotstash  = PETSC_FALSE;
5048   b->colmap      = 0;
5049   b->garray      = 0;
5050   b->roworiented = PETSC_TRUE;
5051 
5052   /* stuff used for matrix vector multiply */
5053   b->lvec      = PETSC_NULL;
5054   b->Mvctx     = PETSC_NULL;
5055 
5056   /* stuff for MatGetRow() */
5057   b->rowindices   = 0;
5058   b->rowvalues    = 0;
5059   b->getrowactive = PETSC_FALSE;
5060 
5061 #if defined(PETSC_HAVE_SPOOLES)
5062   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5063                                      "MatGetFactor_mpiaij_spooles",
5064                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5065 #endif
5066 #if defined(PETSC_HAVE_MUMPS)
5067   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5068                                      "MatGetFactor_mpiaij_mumps",
5069                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5070 #endif
5071 #if defined(PETSC_HAVE_PASTIX)
5072   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5073 					   "MatGetFactor_mpiaij_pastix",
5074 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5075 #endif
5076 #if defined(PETSC_HAVE_SUPERLU_DIST)
5077   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5078                                      "MatGetFactor_mpiaij_superlu_dist",
5079                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5080 #endif
5081   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5082                                      "MatStoreValues_MPIAIJ",
5083                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5084   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5085                                      "MatRetrieveValues_MPIAIJ",
5086                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5087   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5088 				     "MatGetDiagonalBlock_MPIAIJ",
5089                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5090   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5091 				     "MatIsTranspose_MPIAIJ",
5092 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5093   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5094 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5095 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5096   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5097 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5098 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5099   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5100 				     "MatDiagonalScaleLocal_MPIAIJ",
5101 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5102   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5103                                      "MatConvert_MPIAIJ_MPICSRPERM",
5104                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5105   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5106                                      "MatConvert_MPIAIJ_MPICRL",
5107                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5108   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5109                                      "MatConvert_MPIAIJ_MPISBAIJ",
5110                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5111   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5112                                      "MatMatMult_MPIDense_MPIAIJ",
5113                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5114   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5115                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5116                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5117   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5118                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5119                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5120   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5121   PetscFunctionReturn(0);
5122 }
5123 EXTERN_C_END
5124 
5125 #undef __FUNCT__
5126 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5127 /*@
5128      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5129          and "off-diagonal" part of the matrix in CSR format.
5130 
5131    Collective on MPI_Comm
5132 
5133    Input Parameters:
5134 +  comm - MPI communicator
5135 .  m - number of local rows (Cannot be PETSC_DECIDE)
5136 .  n - This value should be the same as the local size used in creating the
5137        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5138        calculated if N is given) For square matrices n is almost always m.
5139 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5140 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5141 .   i - row indices for "diagonal" portion of matrix
5142 .   j - column indices
5143 .   a - matrix values
5144 .   oi - row indices for "off-diagonal" portion of matrix
5145 .   oj - column indices
5146 -   oa - matrix values
5147 
5148    Output Parameter:
5149 .   mat - the matrix
5150 
5151    Level: advanced
5152 
5153    Notes:
5154        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5155 
5156        The i and j indices are 0 based
5157 
5158        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5159 
5160        This sets local rows and cannot be used to set off-processor values.
5161 
5162        You cannot later use MatSetValues() to change values in this matrix.
5163 
5164 .keywords: matrix, aij, compressed row, sparse, parallel
5165 
5166 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5167           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5168 @*/
5169 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5170 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5171 {
5172   PetscErrorCode ierr;
5173   Mat_MPIAIJ     *maij;
5174 
5175  PetscFunctionBegin;
5176   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5177   if (i[0]) {
5178     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5179   }
5180   if (oi[0]) {
5181     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5182   }
5183   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5184   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5185   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5186   maij = (Mat_MPIAIJ*) (*mat)->data;
5187   maij->donotstash     = PETSC_TRUE;
5188   (*mat)->preallocated = PETSC_TRUE;
5189 
5190   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5191   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5192   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5193   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5194 
5195   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5196   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5197 
5198   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5199   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5200   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5201   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5202 
5203   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5204   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5205   PetscFunctionReturn(0);
5206 }
5207 
5208 /*
5209     Special version for direct calls from Fortran
5210 */
5211 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5212 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5213 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5214 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5215 #endif
5216 
5217 /* Change these macros so can be used in void function */
5218 #undef CHKERRQ
5219 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5220 #undef SETERRQ2
5221 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5222 #undef SETERRQ
5223 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5224 
5225 EXTERN_C_BEGIN
5226 #undef __FUNCT__
5227 #define __FUNCT__ "matsetvaluesmpiaij_"
5228 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5229 {
5230   Mat             mat = *mmat;
5231   PetscInt        m = *mm, n = *mn;
5232   InsertMode      addv = *maddv;
5233   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5234   PetscScalar     value;
5235   PetscErrorCode  ierr;
5236 
5237   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5238   if (mat->insertmode == NOT_SET_VALUES) {
5239     mat->insertmode = addv;
5240   }
5241 #if defined(PETSC_USE_DEBUG)
5242   else if (mat->insertmode != addv) {
5243     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5244   }
5245 #endif
5246   {
5247   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5248   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5249   PetscTruth      roworiented = aij->roworiented;
5250 
5251   /* Some Variables required in the macro */
5252   Mat             A = aij->A;
5253   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5254   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5255   MatScalar       *aa = a->a;
5256   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5257   Mat             B = aij->B;
5258   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5259   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5260   MatScalar       *ba = b->a;
5261 
5262   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5263   PetscInt        nonew = a->nonew;
5264   MatScalar       *ap1,*ap2;
5265 
5266   PetscFunctionBegin;
5267   for (i=0; i<m; i++) {
5268     if (im[i] < 0) continue;
5269 #if defined(PETSC_USE_DEBUG)
5270     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5271 #endif
5272     if (im[i] >= rstart && im[i] < rend) {
5273       row      = im[i] - rstart;
5274       lastcol1 = -1;
5275       rp1      = aj + ai[row];
5276       ap1      = aa + ai[row];
5277       rmax1    = aimax[row];
5278       nrow1    = ailen[row];
5279       low1     = 0;
5280       high1    = nrow1;
5281       lastcol2 = -1;
5282       rp2      = bj + bi[row];
5283       ap2      = ba + bi[row];
5284       rmax2    = bimax[row];
5285       nrow2    = bilen[row];
5286       low2     = 0;
5287       high2    = nrow2;
5288 
5289       for (j=0; j<n; j++) {
5290         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5291         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5292         if (in[j] >= cstart && in[j] < cend){
5293           col = in[j] - cstart;
5294           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5295         } else if (in[j] < 0) continue;
5296 #if defined(PETSC_USE_DEBUG)
5297         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5298 #endif
5299         else {
5300           if (mat->was_assembled) {
5301             if (!aij->colmap) {
5302               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5303             }
5304 #if defined (PETSC_USE_CTABLE)
5305             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5306 	    col--;
5307 #else
5308             col = aij->colmap[in[j]] - 1;
5309 #endif
5310             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5311               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5312               col =  in[j];
5313               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5314               B = aij->B;
5315               b = (Mat_SeqAIJ*)B->data;
5316               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5317               rp2      = bj + bi[row];
5318               ap2      = ba + bi[row];
5319               rmax2    = bimax[row];
5320               nrow2    = bilen[row];
5321               low2     = 0;
5322               high2    = nrow2;
5323               bm       = aij->B->rmap->n;
5324               ba = b->a;
5325             }
5326           } else col = in[j];
5327           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5328         }
5329       }
5330     } else {
5331       if (!aij->donotstash) {
5332         if (roworiented) {
5333           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5334         } else {
5335           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5336         }
5337       }
5338     }
5339   }}
5340   PetscFunctionReturnVoid();
5341 }
5342 EXTERN_C_END
5343 
5344