xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 753ebd1d069d7a4e86ed47ccb83df941bd1fb5db)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatDistribute_MPIAIJ"
7 /*
8     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
9     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
10 
11     Only for square matrices
12 */
13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
14 {
15   PetscMPIInt    rank,size;
16   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
17   PetscErrorCode ierr;
18   Mat            mat;
19   Mat_SeqAIJ     *gmata;
20   PetscMPIInt    tag;
21   MPI_Status     status;
22   PetscTruth     aij;
23   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
24 
25   PetscFunctionBegin;
26   CHKMEMQ;
27   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
28   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
29   if (!rank) {
30     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
31     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
32   }
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
35     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
36     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
37     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
38     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
39     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
40     rowners[0] = 0;
41     for (i=2; i<=size; i++) {
42       rowners[i] += rowners[i-1];
43     }
44     rstart = rowners[rank];
45     rend   = rowners[rank+1];
46     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
47     if (!rank) {
48       gmata = (Mat_SeqAIJ*) gmat->data;
49       /* send row lengths to all processors */
50       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
51       for (i=1; i<size; i++) {
52 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
53       }
54       /* determine number diagonal and off-diagonal counts */
55       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
56       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
57       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
58       jj = 0;
59       for (i=0; i<m; i++) {
60 	for (j=0; j<dlens[i]; j++) {
61           if (gmata->j[jj] < rstart) ld[i]++;
62 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
63 	  jj++;
64 	}
65       }
66       /* send column indices to other processes */
67       for (i=1; i<size; i++) {
68 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
69 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
70 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71       }
72 
73       /* send numerical values to other processes */
74       for (i=1; i<size; i++) {
75         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
76         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
77       }
78       gmataa = gmata->a;
79       gmataj = gmata->j;
80 
81     } else {
82       /* receive row lengths */
83       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
84       /* receive column indices */
85       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
86       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
87       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
88       /* determine number diagonal and off-diagonal counts */
89       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
90       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
91       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
92       jj = 0;
93       for (i=0; i<m; i++) {
94 	for (j=0; j<dlens[i]; j++) {
95           if (gmataj[jj] < rstart) ld[i]++;
96 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
97 	  jj++;
98 	}
99       }
100       /* receive numerical values */
101       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
103     }
104     /* set preallocation */
105     for (i=0; i<m; i++) {
106       dlens[i] -= olens[i];
107     }
108     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
109     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
110 
111     for (i=0; i<m; i++) {
112       dlens[i] += olens[i];
113     }
114     cnt  = 0;
115     for (i=0; i<m; i++) {
116       row  = rstart + i;
117       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
118       cnt += dlens[i];
119     }
120     if (rank) {
121       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
122     }
123     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
124     ierr = PetscFree(rowners);CHKERRQ(ierr);
125     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
126     *inmat = mat;
127   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
128     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
129     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
130     mat   = *inmat;
131     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
132     if (!rank) {
133       /* send numerical values to other processes */
134       gmata = (Mat_SeqAIJ*) gmat->data;
135       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
136       gmataa = gmata->a;
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
142     } else {
143       /* receive numerical values from process 0*/
144       nz   = Ad->nz + Ao->nz;
145       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
146       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
147     }
148     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
149     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
150     ad = Ad->a;
151     ao = Ao->a;
152     if (mat->rmap->n) {
153       i  = 0;
154       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
155       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
156     }
157     for (i=1; i<mat->rmap->n; i++) {
158       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
159       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
160     }
161     i--;
162     if (mat->rmap->n) {
163       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
164     }
165     if (rank) {
166       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
167     }
168   }
169   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
170   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   CHKMEMQ;
172   PetscFunctionReturn(0);
173 }
174 
175 /*
176   Local utility routine that creates a mapping from the global column
177 number to the local number in the off-diagonal part of the local
178 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
179 a slightly higher hash table cost; without it it is not scalable (each processor
180 has an order N integer array but is fast to acess.
181 */
182 #undef __FUNCT__
183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
185 {
186   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
187   PetscErrorCode ierr;
188   PetscInt       n = aij->B->cmap->n,i;
189 
190   PetscFunctionBegin;
191 #if defined (PETSC_USE_CTABLE)
192   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
193   for (i=0; i<n; i++){
194     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
195   }
196 #else
197   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
198   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
199   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
201 #endif
202   PetscFunctionReturn(0);
203 }
204 
205 
206 #define CHUNKSIZE   15
207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
208 { \
209     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
210     lastcol1 = col;\
211     while (high1-low1 > 5) { \
212       t = (low1+high1)/2; \
213       if (rp1[t] > col) high1 = t; \
214       else             low1  = t; \
215     } \
216       for (_i=low1; _i<high1; _i++) { \
217         if (rp1[_i] > col) break; \
218         if (rp1[_i] == col) { \
219           if (addv == ADD_VALUES) ap1[_i] += value;   \
220           else                    ap1[_i] = value; \
221           goto a_noinsert; \
222         } \
223       }  \
224       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
225       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
226       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
227       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
228       N = nrow1++ - 1; a->nz++; high1++; \
229       /* shift up all the later entries in this row */ \
230       for (ii=N; ii>=_i; ii--) { \
231         rp1[ii+1] = rp1[ii]; \
232         ap1[ii+1] = ap1[ii]; \
233       } \
234       rp1[_i] = col;  \
235       ap1[_i] = value;  \
236       a_noinsert: ; \
237       ailen[row] = nrow1; \
238 }
239 
240 
241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
242 { \
243     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
244     lastcol2 = col;\
245     while (high2-low2 > 5) { \
246       t = (low2+high2)/2; \
247       if (rp2[t] > col) high2 = t; \
248       else             low2  = t; \
249     } \
250     for (_i=low2; _i<high2; _i++) {		\
251       if (rp2[_i] > col) break;			\
252       if (rp2[_i] == col) {			      \
253 	if (addv == ADD_VALUES) ap2[_i] += value;     \
254 	else                    ap2[_i] = value;      \
255 	goto b_noinsert;			      \
256       }						      \
257     }							      \
258     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
259     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
260     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
261     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
262     N = nrow2++ - 1; b->nz++; high2++;					\
263     /* shift up all the later entries in this row */			\
264     for (ii=N; ii>=_i; ii--) {						\
265       rp2[ii+1] = rp2[ii];						\
266       ap2[ii+1] = ap2[ii];						\
267     }									\
268     rp2[_i] = col;							\
269     ap2[_i] = value;							\
270     b_noinsert: ;								\
271     bilen[row] = nrow2;							\
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
277 {
278   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
279   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
280   PetscErrorCode ierr;
281   PetscInt       l,*garray = mat->garray,diag;
282 
283   PetscFunctionBegin;
284   /* code only works for square matrices A */
285 
286   /* find size of row to the left of the diagonal part */
287   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
288   row  = row - diag;
289   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
290     if (garray[b->j[b->i[row]+l]] > diag) break;
291   }
292   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
293 
294   /* diagonal part */
295   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
296 
297   /* right of diagonal part */
298   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatSetValues_MPIAIJ"
304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscScalar    value;
308   PetscErrorCode ierr;
309   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
310   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
311   PetscTruth     roworiented = aij->roworiented;
312 
313   /* Some Variables required in the macro */
314   Mat            A = aij->A;
315   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
316   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
317   MatScalar      *aa = a->a;
318   PetscTruth     ignorezeroentries = a->ignorezeroentries;
319   Mat            B = aij->B;
320   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
321   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
322   MatScalar      *ba = b->a;
323 
324   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
325   PetscInt       nonew = a->nonew;
326   MatScalar      *ap1,*ap2;
327 
328   PetscFunctionBegin;
329   if (v) PetscValidScalarPointer(v,6);
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
544   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
550 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
551 {
552   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
553   PetscErrorCode ierr;
554 
555   PetscFunctionBegin;
556   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
557   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatZeroRows_MPIAIJ"
563 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
564 {
565   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
568   PetscInt       i,*owners = A->rmap->range;
569   PetscInt       *nprocs,j,idx,nsends,row;
570   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
571   PetscInt       *rvalues,count,base,slen,*source;
572   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
573   MPI_Comm       comm = ((PetscObject)A)->comm;
574   MPI_Request    *send_waits,*recv_waits;
575   MPI_Status     recv_status,*send_status;
576 #if defined(PETSC_DEBUG)
577   PetscTruth     found = PETSC_FALSE;
578 #endif
579 
580   PetscFunctionBegin;
581   /*  first count number of contributors to each processor */
582   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
583   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
584   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
585   j = 0;
586   for (i=0; i<N; i++) {
587     if (lastidx > (idx = rows[i])) j = 0;
588     lastidx = idx;
589     for (; j<size; j++) {
590       if (idx >= owners[j] && idx < owners[j+1]) {
591         nprocs[2*j]++;
592         nprocs[2*j+1] = 1;
593         owner[i] = j;
594 #if defined(PETSC_DEBUG)
595         found = PETSC_TRUE;
596 #endif
597         break;
598       }
599     }
600 #if defined(PETSC_DEBUG)
601     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
602     found = PETSC_FALSE;
603 #endif
604   }
605   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
606 
607   /* inform other processors of number of messages and max length*/
608   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
609 
610   /* post receives:   */
611   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
612   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
613   for (i=0; i<nrecvs; i++) {
614     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
615   }
616 
617   /* do sends:
618       1) starts[i] gives the starting index in svalues for stuff going to
619          the ith processor
620   */
621   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
622   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
623   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
624   starts[0] = 0;
625   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
626   for (i=0; i<N; i++) {
627     svalues[starts[owner[i]]++] = rows[i];
628   }
629 
630   starts[0] = 0;
631   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
632   count = 0;
633   for (i=0; i<size; i++) {
634     if (nprocs[2*i+1]) {
635       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
636     }
637   }
638   ierr = PetscFree(starts);CHKERRQ(ierr);
639 
640   base = owners[rank];
641 
642   /*  wait on receives */
643   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
644   count  = nrecvs; slen = 0;
645   while (count) {
646     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
647     /* unpack receives into our local space */
648     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
649     source[imdex]  = recv_status.MPI_SOURCE;
650     lens[imdex]    = n;
651     slen          += n;
652     count--;
653   }
654   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
655 
656   /* move the data into the send scatter */
657   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
658   count = 0;
659   for (i=0; i<nrecvs; i++) {
660     values = rvalues + i*nmax;
661     for (j=0; j<lens[i]; j++) {
662       lrows[count++] = values[j] - base;
663     }
664   }
665   ierr = PetscFree(rvalues);CHKERRQ(ierr);
666   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
667   ierr = PetscFree(owner);CHKERRQ(ierr);
668   ierr = PetscFree(nprocs);CHKERRQ(ierr);
669 
670   /* actually zap the local rows */
671   /*
672         Zero the required rows. If the "diagonal block" of the matrix
673      is square and the user wishes to set the diagonal we use separate
674      code so that MatSetValues() is not called for each diagonal allocating
675      new memory, thus calling lots of mallocs and slowing things down.
676 
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMultAdd_MPIAIJ"
745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
746 {
747   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
752   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
753   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
754   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
761 {
762   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
763   PetscErrorCode ierr;
764   PetscTruth     merged;
765 
766   PetscFunctionBegin;
767   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
768   /* do nondiagonal part */
769   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
770   if (!merged) {
771     /* send it on its way */
772     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773     /* do local part */
774     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
775     /* receive remote parts: note this assumes the values are not actually */
776     /* added in yy until the next line, */
777     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778   } else {
779     /* do local part */
780     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
781     /* send it on its way */
782     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
783     /* values actually were received in the Begin() but we need to call this nop */
784     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 EXTERN_C_BEGIN
790 #undef __FUNCT__
791 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
793 {
794   MPI_Comm       comm;
795   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
796   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
797   IS             Me,Notme;
798   PetscErrorCode ierr;
799   PetscInt       M,N,first,last,*notme,i;
800   PetscMPIInt    size;
801 
802   PetscFunctionBegin;
803 
804   /* Easy test: symmetric diagonal block */
805   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
806   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
807   if (!*f) PetscFunctionReturn(0);
808   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
809   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
810   if (size == 1) PetscFunctionReturn(0);
811 
812   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
813   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
814   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
815   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
816   for (i=0; i<first; i++) notme[i] = i;
817   for (i=last; i<M; i++) notme[i-last+first] = i;
818   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
819   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
820   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
821   Aoff = Aoffs[0];
822   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
823   Boff = Boffs[0];
824   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
825   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
826   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
827   ierr = ISDestroy(Me);CHKERRQ(ierr);
828   ierr = ISDestroy(Notme);CHKERRQ(ierr);
829 
830   PetscFunctionReturn(0);
831 }
832 EXTERN_C_END
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
837 {
838   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
839   PetscErrorCode ierr;
840 
841   PetscFunctionBegin;
842   /* do nondiagonal part */
843   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
844   /* send it on its way */
845   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
846   /* do local part */
847   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
848   /* receive remote parts */
849   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 /*
854   This only works correctly for square matrices where the subblock A->A is the
855    diagonal block
856 */
857 #undef __FUNCT__
858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
860 {
861   PetscErrorCode ierr;
862   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
863 
864   PetscFunctionBegin;
865   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
866   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
868   }
869   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIAIJ"
875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
876 {
877   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
882   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatDestroy_MPIAIJ"
888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
889 {
890   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_LOG)
895   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
896 #endif
897   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
898   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
899   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
900   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
901 #if defined (PETSC_USE_CTABLE)
902   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
903 #else
904   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
905 #endif
906   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
907   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
908   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
909   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
910   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
911   ierr = PetscFree(aij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatView_MPIAIJ_Binary"
927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
928 {
929   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
930   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
931   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
932   PetscErrorCode    ierr;
933   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
934   int               fd;
935   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
936   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
937   PetscScalar       *column_values;
938 
939   PetscFunctionBegin;
940   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
941   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
942   nz   = A->nz + B->nz;
943   if (!rank) {
944     header[0] = MAT_FILE_COOKIE;
945     header[1] = mat->rmap->N;
946     header[2] = mat->cmap->N;
947     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
948     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
949     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
950     /* get largest number of rows any processor has */
951     rlen = mat->rmap->n;
952     range = mat->rmap->range;
953     for (i=1; i<size; i++) {
954       rlen = PetscMax(rlen,range[i+1] - range[i]);
955     }
956   } else {
957     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
958     rlen = mat->rmap->n;
959   }
960 
961   /* load up the local row counts */
962   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
963   for (i=0; i<mat->rmap->n; i++) {
964     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
965   }
966 
967   /* store the row lengths to the file */
968   if (!rank) {
969     MPI_Status status;
970     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
971     for (i=1; i<size; i++) {
972       rlen = range[i+1] - range[i];
973       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
974       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
975     }
976   } else {
977     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
978   }
979   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
980 
981   /* load up the local column indices */
982   nzmax = nz; /* )th processor needs space a largest processor needs */
983   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
984   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
985   cnt  = 0;
986   for (i=0; i<mat->rmap->n; i++) {
987     for (j=B->i[i]; j<B->i[i+1]; j++) {
988       if ( (col = garray[B->j[j]]) > cstart) break;
989       column_indices[cnt++] = col;
990     }
991     for (k=A->i[i]; k<A->i[i+1]; k++) {
992       column_indices[cnt++] = A->j[k] + cstart;
993     }
994     for (; j<B->i[i+1]; j++) {
995       column_indices[cnt++] = garray[B->j[j]];
996     }
997   }
998   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
999 
1000   /* store the column indices to the file */
1001   if (!rank) {
1002     MPI_Status status;
1003     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1004     for (i=1; i<size; i++) {
1005       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1006       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1007       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1008       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1009     }
1010   } else {
1011     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1012     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the local column values */
1017   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<mat->rmap->n; i++) {
1020     for (j=B->i[i]; j<B->i[i+1]; j++) {
1021       if ( garray[B->j[j]] > cstart) break;
1022       column_values[cnt++] = B->a[j];
1023     }
1024     for (k=A->i[i]; k<A->i[i+1]; k++) {
1025       column_values[cnt++] = A->a[k];
1026     }
1027     for (; j<B->i[i+1]; j++) {
1028       column_values[cnt++] = B->a[j];
1029     }
1030   }
1031   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1032 
1033   /* store the column values to the file */
1034   if (!rank) {
1035     MPI_Status status;
1036     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1037     for (i=1; i<size; i++) {
1038       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1039       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1040       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1041       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1042     }
1043   } else {
1044     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1045     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046   }
1047   ierr = PetscFree(column_values);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1054 {
1055   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1056   PetscErrorCode    ierr;
1057   PetscMPIInt       rank = aij->rank,size = aij->size;
1058   PetscTruth        isdraw,iascii,isbinary;
1059   PetscViewer       sviewer;
1060   PetscViewerFormat format;
1061 
1062   PetscFunctionBegin;
1063   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1065   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1066   if (iascii) {
1067     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1068     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1069       MatInfo    info;
1070       PetscTruth inodes;
1071 
1072       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1073       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1074       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1075       if (!inodes) {
1076         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1077 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1078       } else {
1079         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1080 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1081       }
1082       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1083       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1084       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1085       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1086       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1087       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1088       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1089       PetscFunctionReturn(0);
1090     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1091       PetscInt   inodecount,inodelimit,*inodes;
1092       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1093       if (inodes) {
1094         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1095       } else {
1096         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1097       }
1098       PetscFunctionReturn(0);
1099     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1100       PetscFunctionReturn(0);
1101     }
1102   } else if (isbinary) {
1103     if (size == 1) {
1104       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106     } else {
1107       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1108     }
1109     PetscFunctionReturn(0);
1110   } else if (isdraw) {
1111     PetscDraw  draw;
1112     PetscTruth isnull;
1113     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1114     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1115   }
1116 
1117   if (size == 1) {
1118     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1119     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1120   } else {
1121     /* assemble the entire matrix onto first processor. */
1122     Mat         A;
1123     Mat_SeqAIJ  *Aloc;
1124     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1125     MatScalar   *a;
1126 
1127     if (mat->rmap->N > 1024) {
1128       PetscTruth flg = PETSC_FALSE;
1129 
1130       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1131       if (!flg) {
1132         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1133       }
1134     }
1135 
1136     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1137     if (!rank) {
1138       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1139     } else {
1140       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1141     }
1142     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1143     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1144     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1145     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1146 
1147     /* copy over the A part */
1148     Aloc = (Mat_SeqAIJ*)aij->A->data;
1149     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1150     row = mat->rmap->rstart;
1151     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1155     }
1156     aj = Aloc->j;
1157     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1158 
1159     /* copy over the B part */
1160     Aloc = (Mat_SeqAIJ*)aij->B->data;
1161     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1162     row  = mat->rmap->rstart;
1163     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1164     ct   = cols;
1165     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1166     for (i=0; i<m; i++) {
1167       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1168       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1169     }
1170     ierr = PetscFree(ct);CHKERRQ(ierr);
1171     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     /*
1174        Everyone has to call to draw the matrix since the graphics waits are
1175        synchronized across all processors that share the PetscDraw object
1176     */
1177     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1178     if (!rank) {
1179       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1180       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1181     }
1182     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     ierr = MatDestroy(A);CHKERRQ(ierr);
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_MPIAIJ"
1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1191 {
1192   PetscErrorCode ierr;
1193   PetscTruth     iascii,isdraw,issocket,isbinary;
1194 
1195   PetscFunctionBegin;
1196   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1197   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1200   if (iascii || isdraw || isbinary || issocket) {
1201     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1202   } else {
1203     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1204   }
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatSOR_MPIAIJ"
1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1211 {
1212   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1213   PetscErrorCode ierr;
1214   Vec            bb1 = 0;
1215   PetscTruth     hasop;
1216 
1217   PetscFunctionBegin;
1218   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1219     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1220   }
1221 
1222   if (flag == SOR_APPLY_UPPER) {
1223     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1224     PetscFunctionReturn(0);
1225   }
1226 
1227   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1228     if (flag & SOR_ZERO_INITIAL_GUESS) {
1229       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1230       its--;
1231     }
1232 
1233     while (its--) {
1234       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1236 
1237       /* update rhs: bb1 = bb - B*x */
1238       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1239       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1240 
1241       /* local sweep */
1242       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1243     }
1244   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1245     if (flag & SOR_ZERO_INITIAL_GUESS) {
1246       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1247       its--;
1248     }
1249     while (its--) {
1250       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1251       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1252 
1253       /* update rhs: bb1 = bb - B*x */
1254       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1255       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1256 
1257       /* local sweep */
1258       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1259     }
1260   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1261     if (flag & SOR_ZERO_INITIAL_GUESS) {
1262       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1263       its--;
1264     }
1265     while (its--) {
1266       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268 
1269       /* update rhs: bb1 = bb - B*x */
1270       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1271       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1272 
1273       /* local sweep */
1274       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1275     }
1276   }  else if (flag & SOR_EISENSTAT) {
1277     Vec         xx1;
1278 
1279     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1280     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1281 
1282     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284     if (!mat->diag) {
1285       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1286       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1287     }
1288     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1289     if (hasop) {
1290       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1291     } else {
1292       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1293     }
1294     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1295 
1296     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1297 
1298     /* local sweep */
1299     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1300     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1301     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1302   } else {
1303     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1304   }
1305 
1306   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatPermute_MPIAIJ"
1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1313 {
1314   MPI_Comm       comm,pcomm;
1315   PetscInt       first,local_size,nrows;
1316   const PetscInt *rows;
1317   PetscMPIInt    size;
1318   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1319   PetscErrorCode ierr;
1320 
1321   PetscFunctionBegin;
1322   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1323   /* make a collective version of 'rowp' */
1324   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1325   if (pcomm==comm) {
1326     crowp = rowp;
1327   } else {
1328     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1329     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1330     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1331     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1332   }
1333   /* collect the global row permutation and invert it */
1334   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1335   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1336   if (pcomm!=comm) {
1337     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1338   }
1339   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1340   /* get the local target indices */
1341   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1342   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1343   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1344   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1345   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1346   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1347   /* the column permutation is so much easier;
1348      make a local version of 'colp' and invert it */
1349   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1350   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1351   if (size==1) {
1352     lcolp = colp;
1353   } else {
1354     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1355     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1357   }
1358   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1359   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1360   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1361   if (size>1) {
1362     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1363     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1364   }
1365   /* now we just get the submatrix */
1366   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1367   /* clean up */
1368   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1369   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1376 {
1377   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1378   Mat            A = mat->A,B = mat->B;
1379   PetscErrorCode ierr;
1380   PetscReal      isend[5],irecv[5];
1381 
1382   PetscFunctionBegin;
1383   info->block_size     = 1.0;
1384   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1385   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1386   isend[3] = info->memory;  isend[4] = info->mallocs;
1387   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1388   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1389   isend[3] += info->memory;  isend[4] += info->mallocs;
1390   if (flag == MAT_LOCAL) {
1391     info->nz_used      = isend[0];
1392     info->nz_allocated = isend[1];
1393     info->nz_unneeded  = isend[2];
1394     info->memory       = isend[3];
1395     info->mallocs      = isend[4];
1396   } else if (flag == MAT_GLOBAL_MAX) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   } else if (flag == MAT_GLOBAL_SUM) {
1404     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1405     info->nz_used      = irecv[0];
1406     info->nz_allocated = irecv[1];
1407     info->nz_unneeded  = irecv[2];
1408     info->memory       = irecv[3];
1409     info->mallocs      = irecv[4];
1410   }
1411   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1412   info->fill_ratio_needed = 0;
1413   info->factor_mallocs    = 0;
1414 
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatSetOption_MPIAIJ"
1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1421 {
1422   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1423   PetscErrorCode ierr;
1424 
1425   PetscFunctionBegin;
1426   switch (op) {
1427   case MAT_NEW_NONZERO_LOCATIONS:
1428   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1429   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1430   case MAT_KEEP_NONZERO_PATTERN:
1431   case MAT_NEW_NONZERO_LOCATION_ERR:
1432   case MAT_USE_INODES:
1433   case MAT_IGNORE_ZERO_ENTRIES:
1434     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1435     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1436     break;
1437   case MAT_ROW_ORIENTED:
1438     a->roworiented = flg;
1439     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1440     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1441     break;
1442   case MAT_NEW_DIAGONALS:
1443     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1444     break;
1445   case MAT_IGNORE_OFF_PROC_ENTRIES:
1446     a->donotstash = PETSC_TRUE;
1447     break;
1448   case MAT_SYMMETRIC:
1449     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1450     break;
1451   case MAT_STRUCTURALLY_SYMMETRIC:
1452     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1453     break;
1454   case MAT_HERMITIAN:
1455     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1456     break;
1457   case MAT_SYMMETRY_ETERNAL:
1458     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1459     break;
1460   default:
1461     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1462   }
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatGetRow_MPIAIJ"
1468 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1469 {
1470   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1471   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1472   PetscErrorCode ierr;
1473   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1474   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1475   PetscInt       *cmap,*idx_p;
1476 
1477   PetscFunctionBegin;
1478   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1479   mat->getrowactive = PETSC_TRUE;
1480 
1481   if (!mat->rowvalues && (idx || v)) {
1482     /*
1483         allocate enough space to hold information from the longest row.
1484     */
1485     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1486     PetscInt     max = 1,tmp;
1487     for (i=0; i<matin->rmap->n; i++) {
1488       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1489       if (max < tmp) { max = tmp; }
1490     }
1491     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1492   }
1493 
1494   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1495   lrow = row - rstart;
1496 
1497   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1498   if (!v)   {pvA = 0; pvB = 0;}
1499   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1500   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1501   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1502   nztot = nzA + nzB;
1503 
1504   cmap  = mat->garray;
1505   if (v  || idx) {
1506     if (nztot) {
1507       /* Sort by increasing column numbers, assuming A and B already sorted */
1508       PetscInt imark = -1;
1509       if (v) {
1510         *v = v_p = mat->rowvalues;
1511         for (i=0; i<nzB; i++) {
1512           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1513           else break;
1514         }
1515         imark = i;
1516         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1517         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1518       }
1519       if (idx) {
1520         *idx = idx_p = mat->rowindices;
1521         if (imark > -1) {
1522           for (i=0; i<imark; i++) {
1523             idx_p[i] = cmap[cworkB[i]];
1524           }
1525         } else {
1526           for (i=0; i<nzB; i++) {
1527             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1528             else break;
1529           }
1530           imark = i;
1531         }
1532         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1533         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1534       }
1535     } else {
1536       if (idx) *idx = 0;
1537       if (v)   *v   = 0;
1538     }
1539   }
1540   *nz = nztot;
1541   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1542   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1548 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1549 {
1550   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1551 
1552   PetscFunctionBegin;
1553   if (!aij->getrowactive) {
1554     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1555   }
1556   aij->getrowactive = PETSC_FALSE;
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MatNorm_MPIAIJ"
1562 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1563 {
1564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1565   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1566   PetscErrorCode ierr;
1567   PetscInt       i,j,cstart = mat->cmap->rstart;
1568   PetscReal      sum = 0.0;
1569   MatScalar      *v;
1570 
1571   PetscFunctionBegin;
1572   if (aij->size == 1) {
1573     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1574   } else {
1575     if (type == NORM_FROBENIUS) {
1576       v = amat->a;
1577       for (i=0; i<amat->nz; i++) {
1578 #if defined(PETSC_USE_COMPLEX)
1579         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1580 #else
1581         sum += (*v)*(*v); v++;
1582 #endif
1583       }
1584       v = bmat->a;
1585       for (i=0; i<bmat->nz; i++) {
1586 #if defined(PETSC_USE_COMPLEX)
1587         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1588 #else
1589         sum += (*v)*(*v); v++;
1590 #endif
1591       }
1592       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1593       *norm = sqrt(*norm);
1594     } else if (type == NORM_1) { /* max column norm */
1595       PetscReal *tmp,*tmp2;
1596       PetscInt  *jj,*garray = aij->garray;
1597       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1598       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1599       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1600       *norm = 0.0;
1601       v = amat->a; jj = amat->j;
1602       for (j=0; j<amat->nz; j++) {
1603         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1604       }
1605       v = bmat->a; jj = bmat->j;
1606       for (j=0; j<bmat->nz; j++) {
1607         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1608       }
1609       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1610       for (j=0; j<mat->cmap->N; j++) {
1611         if (tmp2[j] > *norm) *norm = tmp2[j];
1612       }
1613       ierr = PetscFree(tmp);CHKERRQ(ierr);
1614       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1615     } else if (type == NORM_INFINITY) { /* max row norm */
1616       PetscReal ntemp = 0.0;
1617       for (j=0; j<aij->A->rmap->n; j++) {
1618         v = amat->a + amat->i[j];
1619         sum = 0.0;
1620         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1621           sum += PetscAbsScalar(*v); v++;
1622         }
1623         v = bmat->a + bmat->i[j];
1624         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1625           sum += PetscAbsScalar(*v); v++;
1626         }
1627         if (sum > ntemp) ntemp = sum;
1628       }
1629       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1630     } else {
1631       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1632     }
1633   }
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 #undef __FUNCT__
1638 #define __FUNCT__ "MatTranspose_MPIAIJ"
1639 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1640 {
1641   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1642   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1643   PetscErrorCode ierr;
1644   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1645   PetscInt       cstart=A->cmap->rstart,ncol;
1646   Mat            B;
1647   MatScalar      *array;
1648 
1649   PetscFunctionBegin;
1650   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1651 
1652   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1653   ai = Aloc->i; aj = Aloc->j;
1654   bi = Bloc->i; bj = Bloc->j;
1655   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1656     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1657     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1658     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1659     for (i=0; i<ai[ma]; i++){
1660       d_nnz[aj[i]] ++;
1661       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1662     }
1663 
1664     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1665     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1666     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1667     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1668     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1669   } else {
1670     B = *matout;
1671   }
1672 
1673   /* copy over the A part */
1674   array = Aloc->a;
1675   row = A->rmap->rstart;
1676   for (i=0; i<ma; i++) {
1677     ncol = ai[i+1]-ai[i];
1678     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1679     row++; array += ncol; aj += ncol;
1680   }
1681   aj = Aloc->j;
1682   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1683 
1684   /* copy over the B part */
1685   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1686   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1687   array = Bloc->a;
1688   row = A->rmap->rstart;
1689   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1690   cols_tmp = cols;
1691   for (i=0; i<mb; i++) {
1692     ncol = bi[i+1]-bi[i];
1693     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1694     row++; array += ncol; cols_tmp += ncol;
1695   }
1696   ierr = PetscFree(cols);CHKERRQ(ierr);
1697 
1698   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1699   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1701     *matout = B;
1702   } else {
1703     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1710 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1711 {
1712   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1713   Mat            a = aij->A,b = aij->B;
1714   PetscErrorCode ierr;
1715   PetscInt       s1,s2,s3;
1716 
1717   PetscFunctionBegin;
1718   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1719   if (rr) {
1720     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1721     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1722     /* Overlap communication with computation. */
1723     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1724   }
1725   if (ll) {
1726     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1727     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1728     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1729   }
1730   /* scale  the diagonal block */
1731   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1732 
1733   if (rr) {
1734     /* Do a scatter end and then right scale the off-diagonal block */
1735     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1736     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1737   }
1738 
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1744 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1745 {
1746   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1747   PetscErrorCode ierr;
1748 
1749   PetscFunctionBegin;
1750   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1751   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1752   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1753   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1754   PetscFunctionReturn(0);
1755 }
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1758 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1759 {
1760   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1761   PetscErrorCode ierr;
1762 
1763   PetscFunctionBegin;
1764   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 #undef __FUNCT__
1769 #define __FUNCT__ "MatEqual_MPIAIJ"
1770 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1771 {
1772   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1773   Mat            a,b,c,d;
1774   PetscTruth     flg;
1775   PetscErrorCode ierr;
1776 
1777   PetscFunctionBegin;
1778   a = matA->A; b = matA->B;
1779   c = matB->A; d = matB->B;
1780 
1781   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1782   if (flg) {
1783     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1784   }
1785   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 #undef __FUNCT__
1790 #define __FUNCT__ "MatCopy_MPIAIJ"
1791 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1792 {
1793   PetscErrorCode ierr;
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1795   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1796 
1797   PetscFunctionBegin;
1798   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1799   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1800     /* because of the column compression in the off-processor part of the matrix a->B,
1801        the number of columns in a->B and b->B may be different, hence we cannot call
1802        the MatCopy() directly on the two parts. If need be, we can provide a more
1803        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1804        then copying the submatrices */
1805     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1806   } else {
1807     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1808     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1809   }
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1815 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1816 {
1817   PetscErrorCode ierr;
1818 
1819   PetscFunctionBegin;
1820   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 #include "petscblaslapack.h"
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatAXPY_MPIAIJ"
1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1828 {
1829   PetscErrorCode ierr;
1830   PetscInt       i;
1831   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1832   PetscBLASInt   bnz,one=1;
1833   Mat_SeqAIJ     *x,*y;
1834 
1835   PetscFunctionBegin;
1836   if (str == SAME_NONZERO_PATTERN) {
1837     PetscScalar alpha = a;
1838     x = (Mat_SeqAIJ *)xx->A->data;
1839     y = (Mat_SeqAIJ *)yy->A->data;
1840     bnz = PetscBLASIntCast(x->nz);
1841     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1842     x = (Mat_SeqAIJ *)xx->B->data;
1843     y = (Mat_SeqAIJ *)yy->B->data;
1844     bnz = PetscBLASIntCast(x->nz);
1845     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1846   } else if (str == SUBSET_NONZERO_PATTERN) {
1847     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1848 
1849     x = (Mat_SeqAIJ *)xx->B->data;
1850     y = (Mat_SeqAIJ *)yy->B->data;
1851     if (y->xtoy && y->XtoY != xx->B) {
1852       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1853       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1854     }
1855     if (!y->xtoy) { /* get xtoy */
1856       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1857       y->XtoY = xx->B;
1858       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1859     }
1860     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1861   } else {
1862     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1863   }
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatConjugate_MPIAIJ"
1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1872 {
1873 #if defined(PETSC_USE_COMPLEX)
1874   PetscErrorCode ierr;
1875   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1876 
1877   PetscFunctionBegin;
1878   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1879   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1880 #else
1881   PetscFunctionBegin;
1882 #endif
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatRealPart_MPIAIJ"
1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1889 {
1890   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1891   PetscErrorCode ierr;
1892 
1893   PetscFunctionBegin;
1894   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1895   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1902 {
1903   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1904   PetscErrorCode ierr;
1905 
1906   PetscFunctionBegin;
1907   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1908   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #ifdef PETSC_HAVE_PBGL
1913 
1914 #include <boost/parallel/mpi/bsp_process_group.hpp>
1915 #include <boost/graph/distributed/ilu_default_graph.hpp>
1916 #include <boost/graph/distributed/ilu_0_block.hpp>
1917 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1918 #include <boost/graph/distributed/petsc/interface.hpp>
1919 #include <boost/multi_array.hpp>
1920 #include <boost/parallel/distributed_property_map->hpp>
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1924 /*
1925   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1926 */
1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1928 {
1929   namespace petsc = boost::distributed::petsc;
1930 
1931   namespace graph_dist = boost::graph::distributed;
1932   using boost::graph::distributed::ilu_default::process_group_type;
1933   using boost::graph::ilu_permuted;
1934 
1935   PetscTruth      row_identity, col_identity;
1936   PetscContainer  c;
1937   PetscInt        m, n, M, N;
1938   PetscErrorCode  ierr;
1939 
1940   PetscFunctionBegin;
1941   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1942   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1943   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1944   if (!row_identity || !col_identity) {
1945     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1946   }
1947 
1948   process_group_type pg;
1949   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1950   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1951   lgraph_type&   level_graph = *lgraph_p;
1952   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1953 
1954   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1955   ilu_permuted(level_graph);
1956 
1957   /* put together the new matrix */
1958   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1959   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1960   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1961   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1962   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1963   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1964   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1965 
1966   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1967   ierr = PetscContainerSetPointer(c, lgraph_p);
1968   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1975 {
1976   PetscFunctionBegin;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatSolve_MPIAIJ"
1982 /*
1983   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1984 */
1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1986 {
1987   namespace graph_dist = boost::graph::distributed;
1988 
1989   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1990   lgraph_type*   lgraph_p;
1991   PetscContainer c;
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1996   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1997   ierr = VecCopy(b, x);CHKERRQ(ierr);
1998 
1999   PetscScalar* array_x;
2000   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2001   PetscInt sx;
2002   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2003 
2004   PetscScalar* array_b;
2005   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2006   PetscInt sb;
2007   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2008 
2009   lgraph_type&   level_graph = *lgraph_p;
2010   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2011 
2012   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2013   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2014                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2015 
2016   typedef boost::iterator_property_map<array_ref_type::iterator,
2017                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2018   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2019                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2020 
2021   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2022 
2023   PetscFunctionReturn(0);
2024 }
2025 #endif
2026 
2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2028   PetscInt       nzlocal,nsends,nrecvs;
2029   PetscMPIInt    *send_rank,*recv_rank;
2030   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2031   PetscScalar    *sbuf_a,**rbuf_a;
2032   PetscErrorCode (*MatDestroy)(Mat);
2033 } Mat_Redundant;
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2038 {
2039   PetscErrorCode       ierr;
2040   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2041   PetscInt             i;
2042 
2043   PetscFunctionBegin;
2044   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2045   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2046   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2047   for (i=0; i<redund->nrecvs; i++){
2048     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2049     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2050   }
2051   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2052   ierr = PetscFree(redund);CHKERRQ(ierr);
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 #undef __FUNCT__
2057 #define __FUNCT__ "MatDestroy_MatRedundant"
2058 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2059 {
2060   PetscErrorCode  ierr;
2061   PetscContainer  container;
2062   Mat_Redundant   *redund=PETSC_NULL;
2063 
2064   PetscFunctionBegin;
2065   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2066   if (container) {
2067     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2068   } else {
2069     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2070   }
2071   A->ops->destroy = redund->MatDestroy;
2072   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2073   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2074   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2081 {
2082   PetscMPIInt    rank,size;
2083   MPI_Comm       comm=((PetscObject)mat)->comm;
2084   PetscErrorCode ierr;
2085   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2086   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2087   PetscInt       *rowrange=mat->rmap->range;
2088   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2089   Mat            A=aij->A,B=aij->B,C=*matredundant;
2090   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2091   PetscScalar    *sbuf_a;
2092   PetscInt       nzlocal=a->nz+b->nz;
2093   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2094   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2095   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2096   MatScalar      *aworkA,*aworkB;
2097   PetscScalar    *vals;
2098   PetscMPIInt    tag1,tag2,tag3,imdex;
2099   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2100                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2101   MPI_Status     recv_status,*send_status;
2102   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2103   PetscInt       **rbuf_j=PETSC_NULL;
2104   PetscScalar    **rbuf_a=PETSC_NULL;
2105   Mat_Redundant  *redund=PETSC_NULL;
2106   PetscContainer container;
2107 
2108   PetscFunctionBegin;
2109   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2111 
2112   if (reuse == MAT_REUSE_MATRIX) {
2113     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2114     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2115     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2116     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2117     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2118     if (container) {
2119       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2120     } else {
2121       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2122     }
2123     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2124 
2125     nsends    = redund->nsends;
2126     nrecvs    = redund->nrecvs;
2127     send_rank = redund->send_rank;
2128     recv_rank = redund->recv_rank;
2129     sbuf_nz   = redund->sbuf_nz;
2130     rbuf_nz   = redund->rbuf_nz;
2131     sbuf_j    = redund->sbuf_j;
2132     sbuf_a    = redund->sbuf_a;
2133     rbuf_j    = redund->rbuf_j;
2134     rbuf_a    = redund->rbuf_a;
2135   }
2136 
2137   if (reuse == MAT_INITIAL_MATRIX){
2138     PetscMPIInt  subrank,subsize;
2139     PetscInt     nleftover,np_subcomm;
2140     /* get the destination processors' id send_rank, nsends and nrecvs */
2141     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2142     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2143     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2144     np_subcomm = size/nsubcomm;
2145     nleftover  = size - nsubcomm*np_subcomm;
2146     nsends = 0; nrecvs = 0;
2147     for (i=0; i<size; i++){ /* i=rank*/
2148       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2149         send_rank[nsends] = i; nsends++;
2150         recv_rank[nrecvs++] = i;
2151       }
2152     }
2153     if (rank >= size - nleftover){/* this proc is a leftover processor */
2154       i = size-nleftover-1;
2155       j = 0;
2156       while (j < nsubcomm - nleftover){
2157         send_rank[nsends++] = i;
2158         i--; j++;
2159       }
2160     }
2161 
2162     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2163       for (i=0; i<nleftover; i++){
2164         recv_rank[nrecvs++] = size-nleftover+i;
2165       }
2166     }
2167 
2168     /* allocate sbuf_j, sbuf_a */
2169     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2170     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2171     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2172   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2173 
2174   /* copy mat's local entries into the buffers */
2175   if (reuse == MAT_INITIAL_MATRIX){
2176     rownz_max = 0;
2177     rptr = sbuf_j;
2178     cols = sbuf_j + rend-rstart + 1;
2179     vals = sbuf_a;
2180     rptr[0] = 0;
2181     for (i=0; i<rend-rstart; i++){
2182       row = i + rstart;
2183       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2184       ncols  = nzA + nzB;
2185       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2186       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2187       /* load the column indices for this row into cols */
2188       lwrite = 0;
2189       for (l=0; l<nzB; l++) {
2190         if ((ctmp = bmap[cworkB[l]]) < cstart){
2191           vals[lwrite]   = aworkB[l];
2192           cols[lwrite++] = ctmp;
2193         }
2194       }
2195       for (l=0; l<nzA; l++){
2196         vals[lwrite]   = aworkA[l];
2197         cols[lwrite++] = cstart + cworkA[l];
2198       }
2199       for (l=0; l<nzB; l++) {
2200         if ((ctmp = bmap[cworkB[l]]) >= cend){
2201           vals[lwrite]   = aworkB[l];
2202           cols[lwrite++] = ctmp;
2203         }
2204       }
2205       vals += ncols;
2206       cols += ncols;
2207       rptr[i+1] = rptr[i] + ncols;
2208       if (rownz_max < ncols) rownz_max = ncols;
2209     }
2210     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2211   } else { /* only copy matrix values into sbuf_a */
2212     rptr = sbuf_j;
2213     vals = sbuf_a;
2214     rptr[0] = 0;
2215     for (i=0; i<rend-rstart; i++){
2216       row = i + rstart;
2217       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2218       ncols  = nzA + nzB;
2219       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2220       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2221       lwrite = 0;
2222       for (l=0; l<nzB; l++) {
2223         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2224       }
2225       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2226       for (l=0; l<nzB; l++) {
2227         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2228       }
2229       vals += ncols;
2230       rptr[i+1] = rptr[i] + ncols;
2231     }
2232   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2233 
2234   /* send nzlocal to others, and recv other's nzlocal */
2235   /*--------------------------------------------------*/
2236   if (reuse == MAT_INITIAL_MATRIX){
2237     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2238     s_waits2 = s_waits3 + nsends;
2239     s_waits1 = s_waits2 + nsends;
2240     r_waits1 = s_waits1 + nsends;
2241     r_waits2 = r_waits1 + nrecvs;
2242     r_waits3 = r_waits2 + nrecvs;
2243   } else {
2244     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2245     r_waits3 = s_waits3 + nsends;
2246   }
2247 
2248   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2249   if (reuse == MAT_INITIAL_MATRIX){
2250     /* get new tags to keep the communication clean */
2251     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2252     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2253     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2254 
2255     /* post receives of other's nzlocal */
2256     for (i=0; i<nrecvs; i++){
2257       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2258     }
2259     /* send nzlocal to others */
2260     for (i=0; i<nsends; i++){
2261       sbuf_nz[i] = nzlocal;
2262       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2263     }
2264     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2265     count = nrecvs;
2266     while (count) {
2267       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2268       recv_rank[imdex] = recv_status.MPI_SOURCE;
2269       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2270       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2271 
2272       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2273       rbuf_nz[imdex] += i + 2;
2274       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2275       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2276       count--;
2277     }
2278     /* wait on sends of nzlocal */
2279     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2280     /* send mat->i,j to others, and recv from other's */
2281     /*------------------------------------------------*/
2282     for (i=0; i<nsends; i++){
2283       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2284       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2285     }
2286     /* wait on receives of mat->i,j */
2287     /*------------------------------*/
2288     count = nrecvs;
2289     while (count) {
2290       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2291       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2292       count--;
2293     }
2294     /* wait on sends of mat->i,j */
2295     /*---------------------------*/
2296     if (nsends) {
2297       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2298     }
2299   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2300 
2301   /* post receives, send and receive mat->a */
2302   /*----------------------------------------*/
2303   for (imdex=0; imdex<nrecvs; imdex++) {
2304     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2305   }
2306   for (i=0; i<nsends; i++){
2307     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2308   }
2309   count = nrecvs;
2310   while (count) {
2311     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2312     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2313     count--;
2314   }
2315   if (nsends) {
2316     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2317   }
2318 
2319   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2320 
2321   /* create redundant matrix */
2322   /*-------------------------*/
2323   if (reuse == MAT_INITIAL_MATRIX){
2324     /* compute rownz_max for preallocation */
2325     for (imdex=0; imdex<nrecvs; imdex++){
2326       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2327       rptr = rbuf_j[imdex];
2328       for (i=0; i<j; i++){
2329         ncols = rptr[i+1] - rptr[i];
2330         if (rownz_max < ncols) rownz_max = ncols;
2331       }
2332     }
2333 
2334     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2335     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2336     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2337     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2338     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2339   } else {
2340     C = *matredundant;
2341   }
2342 
2343   /* insert local matrix entries */
2344   rptr = sbuf_j;
2345   cols = sbuf_j + rend-rstart + 1;
2346   vals = sbuf_a;
2347   for (i=0; i<rend-rstart; i++){
2348     row   = i + rstart;
2349     ncols = rptr[i+1] - rptr[i];
2350     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2351     vals += ncols;
2352     cols += ncols;
2353   }
2354   /* insert received matrix entries */
2355   for (imdex=0; imdex<nrecvs; imdex++){
2356     rstart = rowrange[recv_rank[imdex]];
2357     rend   = rowrange[recv_rank[imdex]+1];
2358     rptr = rbuf_j[imdex];
2359     cols = rbuf_j[imdex] + rend-rstart + 1;
2360     vals = rbuf_a[imdex];
2361     for (i=0; i<rend-rstart; i++){
2362       row   = i + rstart;
2363       ncols = rptr[i+1] - rptr[i];
2364       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2365       vals += ncols;
2366       cols += ncols;
2367     }
2368   }
2369   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2370   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2371   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2372   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2373   if (reuse == MAT_INITIAL_MATRIX){
2374     PetscContainer container;
2375     *matredundant = C;
2376     /* create a supporting struct and attach it to C for reuse */
2377     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2378     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2379     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2380     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2381     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2382 
2383     redund->nzlocal = nzlocal;
2384     redund->nsends  = nsends;
2385     redund->nrecvs  = nrecvs;
2386     redund->send_rank = send_rank;
2387     redund->recv_rank = recv_rank;
2388     redund->sbuf_nz = sbuf_nz;
2389     redund->rbuf_nz = rbuf_nz;
2390     redund->sbuf_j  = sbuf_j;
2391     redund->sbuf_a  = sbuf_a;
2392     redund->rbuf_j  = rbuf_j;
2393     redund->rbuf_a  = rbuf_a;
2394 
2395     redund->MatDestroy = C->ops->destroy;
2396     C->ops->destroy    = MatDestroy_MatRedundant;
2397   }
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2404 {
2405   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2406   PetscErrorCode ierr;
2407   PetscInt       i,*idxb = 0;
2408   PetscScalar    *va,*vb;
2409   Vec            vtmp;
2410 
2411   PetscFunctionBegin;
2412   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2413   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2414   if (idx) {
2415     for (i=0; i<A->rmap->n; i++) {
2416       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2417     }
2418   }
2419 
2420   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2421   if (idx) {
2422     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2423   }
2424   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2425   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2426 
2427   for (i=0; i<A->rmap->n; i++){
2428     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2429       va[i] = vb[i];
2430       if (idx) idx[i] = a->garray[idxb[i]];
2431     }
2432   }
2433 
2434   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2435   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2436   if (idxb) {
2437     ierr = PetscFree(idxb);CHKERRQ(ierr);
2438   }
2439   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2446 {
2447   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2448   PetscErrorCode ierr;
2449   PetscInt       i,*idxb = 0;
2450   PetscScalar    *va,*vb;
2451   Vec            vtmp;
2452 
2453   PetscFunctionBegin;
2454   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2455   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2456   if (idx) {
2457     for (i=0; i<A->cmap->n; i++) {
2458       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2459     }
2460   }
2461 
2462   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2463   if (idx) {
2464     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2465   }
2466   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2467   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2468 
2469   for (i=0; i<A->rmap->n; i++){
2470     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2471       va[i] = vb[i];
2472       if (idx) idx[i] = a->garray[idxb[i]];
2473     }
2474   }
2475 
2476   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2477   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2478   if (idxb) {
2479     ierr = PetscFree(idxb);CHKERRQ(ierr);
2480   }
2481   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2488 {
2489   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2490   PetscInt       n      = A->rmap->n;
2491   PetscInt       cstart = A->cmap->rstart;
2492   PetscInt      *cmap   = mat->garray;
2493   PetscInt      *diagIdx, *offdiagIdx;
2494   Vec            diagV, offdiagV;
2495   PetscScalar   *a, *diagA, *offdiagA;
2496   PetscInt       r;
2497   PetscErrorCode ierr;
2498 
2499   PetscFunctionBegin;
2500   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2501   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2502   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2503   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2504   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2505   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2506   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2507   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2508   for(r = 0; r < n; ++r) {
2509     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2510       a[r]   = diagA[r];
2511       idx[r] = cstart + diagIdx[r];
2512     } else {
2513       a[r]   = offdiagA[r];
2514       idx[r] = cmap[offdiagIdx[r]];
2515     }
2516   }
2517   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2518   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2519   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2520   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2521   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2522   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2529 {
2530   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2531   PetscInt       n      = A->rmap->n;
2532   PetscInt       cstart = A->cmap->rstart;
2533   PetscInt      *cmap   = mat->garray;
2534   PetscInt      *diagIdx, *offdiagIdx;
2535   Vec            diagV, offdiagV;
2536   PetscScalar   *a, *diagA, *offdiagA;
2537   PetscInt       r;
2538   PetscErrorCode ierr;
2539 
2540   PetscFunctionBegin;
2541   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2542   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2543   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2544   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2545   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2546   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2547   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2548   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2549   for(r = 0; r < n; ++r) {
2550     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2551       a[r]   = diagA[r];
2552       idx[r] = cstart + diagIdx[r];
2553     } else {
2554       a[r]   = offdiagA[r];
2555       idx[r] = cmap[offdiagIdx[r]];
2556     }
2557   }
2558   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2559   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2560   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2561   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2562   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2563   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 #undef __FUNCT__
2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2570 {
2571   PetscErrorCode ierr;
2572   Mat            *dummy;
2573 
2574   PetscFunctionBegin;
2575   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2576   *newmat = *dummy;
2577   ierr = PetscFree(dummy);CHKERRQ(ierr);
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2582 /* -------------------------------------------------------------------*/
2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2584        MatGetRow_MPIAIJ,
2585        MatRestoreRow_MPIAIJ,
2586        MatMult_MPIAIJ,
2587 /* 4*/ MatMultAdd_MPIAIJ,
2588        MatMultTranspose_MPIAIJ,
2589        MatMultTransposeAdd_MPIAIJ,
2590 #ifdef PETSC_HAVE_PBGL
2591        MatSolve_MPIAIJ,
2592 #else
2593        0,
2594 #endif
2595        0,
2596        0,
2597 /*10*/ 0,
2598        0,
2599        0,
2600        MatSOR_MPIAIJ,
2601        MatTranspose_MPIAIJ,
2602 /*15*/ MatGetInfo_MPIAIJ,
2603        MatEqual_MPIAIJ,
2604        MatGetDiagonal_MPIAIJ,
2605        MatDiagonalScale_MPIAIJ,
2606        MatNorm_MPIAIJ,
2607 /*20*/ MatAssemblyBegin_MPIAIJ,
2608        MatAssemblyEnd_MPIAIJ,
2609        MatSetOption_MPIAIJ,
2610        MatZeroEntries_MPIAIJ,
2611 /*24*/ MatZeroRows_MPIAIJ,
2612        0,
2613 #ifdef PETSC_HAVE_PBGL
2614        0,
2615 #else
2616        0,
2617 #endif
2618        0,
2619        0,
2620 /*29*/ MatSetUpPreallocation_MPIAIJ,
2621 #ifdef PETSC_HAVE_PBGL
2622        0,
2623 #else
2624        0,
2625 #endif
2626        0,
2627        0,
2628        0,
2629 /*34*/ MatDuplicate_MPIAIJ,
2630        0,
2631        0,
2632        0,
2633        0,
2634 /*39*/ MatAXPY_MPIAIJ,
2635        MatGetSubMatrices_MPIAIJ,
2636        MatIncreaseOverlap_MPIAIJ,
2637        MatGetValues_MPIAIJ,
2638        MatCopy_MPIAIJ,
2639 /*44*/ MatGetRowMax_MPIAIJ,
2640        MatScale_MPIAIJ,
2641        0,
2642        0,
2643        0,
2644 /*49*/ MatSetBlockSize_MPIAIJ,
2645        0,
2646        0,
2647        0,
2648        0,
2649 /*54*/ MatFDColoringCreate_MPIAIJ,
2650        0,
2651        MatSetUnfactored_MPIAIJ,
2652        MatPermute_MPIAIJ,
2653        0,
2654 /*59*/ MatGetSubMatrix_MPIAIJ,
2655        MatDestroy_MPIAIJ,
2656        MatView_MPIAIJ,
2657        0,
2658        0,
2659 /*64*/ 0,
2660        0,
2661        0,
2662        0,
2663        0,
2664 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2665        MatGetRowMinAbs_MPIAIJ,
2666        0,
2667        MatSetColoring_MPIAIJ,
2668 #if defined(PETSC_HAVE_ADIC)
2669        MatSetValuesAdic_MPIAIJ,
2670 #else
2671        0,
2672 #endif
2673        MatSetValuesAdifor_MPIAIJ,
2674 /*75*/ MatFDColoringApply_AIJ,
2675        0,
2676        0,
2677        0,
2678        0,
2679 /*80*/ 0,
2680        0,
2681        0,
2682 /*83*/ MatLoad_MPIAIJ,
2683        0,
2684        0,
2685        0,
2686        0,
2687        0,
2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2689        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2690        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2691        MatPtAP_Basic,
2692        MatPtAPSymbolic_MPIAIJ,
2693 /*94*/ MatPtAPNumeric_MPIAIJ,
2694        0,
2695        0,
2696        0,
2697        0,
2698 /*99*/ 0,
2699        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2700        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2701        MatConjugate_MPIAIJ,
2702        0,
2703 /*104*/MatSetValuesRow_MPIAIJ,
2704        MatRealPart_MPIAIJ,
2705        MatImaginaryPart_MPIAIJ,
2706        0,
2707        0,
2708 /*109*/0,
2709        MatGetRedundantMatrix_MPIAIJ,
2710        MatGetRowMin_MPIAIJ,
2711        0,
2712        0,
2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2714        0,
2715        0,
2716        0,
2717        0,
2718        0
2719 };
2720 
2721 /* ----------------------------------------------------------------------------------------*/
2722 
2723 EXTERN_C_BEGIN
2724 #undef __FUNCT__
2725 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2726 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2727 {
2728   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2729   PetscErrorCode ierr;
2730 
2731   PetscFunctionBegin;
2732   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2733   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2734   PetscFunctionReturn(0);
2735 }
2736 EXTERN_C_END
2737 
2738 EXTERN_C_BEGIN
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2741 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2742 {
2743   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2748   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2749   PetscFunctionReturn(0);
2750 }
2751 EXTERN_C_END
2752 
2753 #include "petscpc.h"
2754 EXTERN_C_BEGIN
2755 #undef __FUNCT__
2756 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2757 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2758 {
2759   Mat_MPIAIJ     *b;
2760   PetscErrorCode ierr;
2761   PetscInt       i;
2762 
2763   PetscFunctionBegin;
2764   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2765   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2766   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2767   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2768 
2769   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2770   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2771   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2772   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2773   if (d_nnz) {
2774     for (i=0; i<B->rmap->n; i++) {
2775       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2776     }
2777   }
2778   if (o_nnz) {
2779     for (i=0; i<B->rmap->n; i++) {
2780       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2781     }
2782   }
2783   b = (Mat_MPIAIJ*)B->data;
2784 
2785   if (!B->preallocated) {
2786     /* Explicitly create 2 MATSEQAIJ matrices. */
2787     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2788     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2789     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2790     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2791     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2792     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2793     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2794     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2795   }
2796 
2797   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2798   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2799   B->preallocated = PETSC_TRUE;
2800   PetscFunctionReturn(0);
2801 }
2802 EXTERN_C_END
2803 
2804 #undef __FUNCT__
2805 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2806 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2807 {
2808   Mat            mat;
2809   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2810   PetscErrorCode ierr;
2811 
2812   PetscFunctionBegin;
2813   *newmat       = 0;
2814   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2815   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2816   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2817   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2818   a    = (Mat_MPIAIJ*)mat->data;
2819 
2820   mat->factor       = matin->factor;
2821   mat->rmap->bs      = matin->rmap->bs;
2822   mat->assembled    = PETSC_TRUE;
2823   mat->insertmode   = NOT_SET_VALUES;
2824   mat->preallocated = PETSC_TRUE;
2825 
2826   a->size           = oldmat->size;
2827   a->rank           = oldmat->rank;
2828   a->donotstash     = oldmat->donotstash;
2829   a->roworiented    = oldmat->roworiented;
2830   a->rowindices     = 0;
2831   a->rowvalues      = 0;
2832   a->getrowactive   = PETSC_FALSE;
2833 
2834   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2835   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2836 
2837   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2838   if (oldmat->colmap) {
2839 #if defined (PETSC_USE_CTABLE)
2840     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2841 #else
2842     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2843     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2844     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2845 #endif
2846   } else a->colmap = 0;
2847   if (oldmat->garray) {
2848     PetscInt len;
2849     len  = oldmat->B->cmap->n;
2850     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2851     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2852     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2853   } else a->garray = 0;
2854 
2855   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2856   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2857   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2858   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2859   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2860   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2861   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2862   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2863   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2864   *newmat = mat;
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 #include "petscsys.h"
2869 
2870 #undef __FUNCT__
2871 #define __FUNCT__ "MatLoad_MPIAIJ"
2872 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2873 {
2874   Mat            A;
2875   PetscScalar    *vals,*svals;
2876   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2877   MPI_Status     status;
2878   PetscErrorCode ierr;
2879   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2880   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2881   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2882   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2883   PetscInt       cend,cstart,n,*rowners;
2884   int            fd;
2885 
2886   PetscFunctionBegin;
2887   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2888   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2889   if (!rank) {
2890     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2891     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2892     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2893   }
2894 
2895   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2896   M = header[1]; N = header[2];
2897   /* determine ownership of all rows */
2898   m    = M/size + ((M % size) > rank);
2899   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2900   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2901 
2902   /* First process needs enough room for process with most rows */
2903   if (!rank) {
2904     mmax       = rowners[1];
2905     for (i=2; i<size; i++) {
2906       mmax = PetscMax(mmax,rowners[i]);
2907     }
2908   } else mmax = m;
2909 
2910   rowners[0] = 0;
2911   for (i=2; i<=size; i++) {
2912     rowners[i] += rowners[i-1];
2913   }
2914   rstart = rowners[rank];
2915   rend   = rowners[rank+1];
2916 
2917   /* distribute row lengths to all processors */
2918   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2919   if (!rank) {
2920     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2921     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2922     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2923     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2924     for (j=0; j<m; j++) {
2925       procsnz[0] += ourlens[j];
2926     }
2927     for (i=1; i<size; i++) {
2928       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2929       /* calculate the number of nonzeros on each processor */
2930       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2931         procsnz[i] += rowlengths[j];
2932       }
2933       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2934       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2935     }
2936     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2937   } else {
2938     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2939     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2940   }
2941 
2942   if (!rank) {
2943     /* determine max buffer needed and allocate it */
2944     maxnz = 0;
2945     for (i=0; i<size; i++) {
2946       maxnz = PetscMax(maxnz,procsnz[i]);
2947     }
2948     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2949 
2950     /* read in my part of the matrix column indices  */
2951     nz   = procsnz[0];
2952     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2953     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2954 
2955     /* read in every one elses and ship off */
2956     for (i=1; i<size; i++) {
2957       nz     = procsnz[i];
2958       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2959       mpicnt = PetscMPIIntCast(nz);
2960       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2961     }
2962     ierr = PetscFree(cols);CHKERRQ(ierr);
2963   } else {
2964     /* determine buffer space needed for message */
2965     nz = 0;
2966     for (i=0; i<m; i++) {
2967       nz += ourlens[i];
2968     }
2969     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2970 
2971     /* receive message of column indices*/
2972     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2973     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2974     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2975     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2976     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2977     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2978   }
2979 
2980   /* determine column ownership if matrix is not square */
2981   if (N != M) {
2982     n      = N/size + ((N % size) > rank);
2983     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2984     cstart = cend - n;
2985   } else {
2986     cstart = rstart;
2987     cend   = rend;
2988     n      = cend - cstart;
2989   }
2990 
2991   /* loop over local rows, determining number of off diagonal entries */
2992   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2993   jj = 0;
2994   for (i=0; i<m; i++) {
2995     for (j=0; j<ourlens[i]; j++) {
2996       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2997       jj++;
2998     }
2999   }
3000 
3001   /* create our matrix */
3002   for (i=0; i<m; i++) {
3003     ourlens[i] -= offlens[i];
3004   }
3005   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3006   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
3007   ierr = MatSetType(A,type);CHKERRQ(ierr);
3008   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
3009 
3010   for (i=0; i<m; i++) {
3011     ourlens[i] += offlens[i];
3012   }
3013 
3014   if (!rank) {
3015     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3016 
3017     /* read in my part of the matrix numerical values  */
3018     nz   = procsnz[0];
3019     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3020 
3021     /* insert into matrix */
3022     jj      = rstart;
3023     smycols = mycols;
3024     svals   = vals;
3025     for (i=0; i<m; i++) {
3026       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3027       smycols += ourlens[i];
3028       svals   += ourlens[i];
3029       jj++;
3030     }
3031 
3032     /* read in other processors and ship out */
3033     for (i=1; i<size; i++) {
3034       nz     = procsnz[i];
3035       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3036       mpicnt = PetscMPIIntCast(nz);
3037       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3038     }
3039     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3040   } else {
3041     /* receive numeric values */
3042     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3043 
3044     /* receive message of values*/
3045     mpicnt = PetscMPIIntCast(nz);
3046     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3047     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3048     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3049     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3050     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3051 
3052     /* insert into matrix */
3053     jj      = rstart;
3054     smycols = mycols;
3055     svals   = vals;
3056     for (i=0; i<m; i++) {
3057       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3058       smycols += ourlens[i];
3059       svals   += ourlens[i];
3060       jj++;
3061     }
3062   }
3063   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3064   ierr = PetscFree(vals);CHKERRQ(ierr);
3065   ierr = PetscFree(mycols);CHKERRQ(ierr);
3066   ierr = PetscFree(rowners);CHKERRQ(ierr);
3067 
3068   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3069   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3070   *newmat = A;
3071   PetscFunctionReturn(0);
3072 }
3073 
3074 #undef __FUNCT__
3075 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3076 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3077 {
3078   PetscErrorCode ierr;
3079   IS             iscol_local;
3080   PetscInt       csize;
3081 
3082   PetscFunctionBegin;
3083   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3084   if (call == MAT_REUSE_MATRIX) {
3085     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3086     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3087   } else {
3088     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3089   }
3090   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3091   if (call == MAT_INITIAL_MATRIX) {
3092     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3093     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3094   }
3095   PetscFunctionReturn(0);
3096 }
3097 
3098 #undef __FUNCT__
3099 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3100 /*
3101     Not great since it makes two copies of the submatrix, first an SeqAIJ
3102   in local and then by concatenating the local matrices the end result.
3103   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3104 
3105   Note: This requires a sequential iscol with all indices.
3106 */
3107 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3108 {
3109   PetscErrorCode ierr;
3110   PetscMPIInt    rank,size;
3111   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3112   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3113   Mat            *local,M,Mreuse;
3114   MatScalar      *vwork,*aa;
3115   MPI_Comm       comm = ((PetscObject)mat)->comm;
3116   Mat_SeqAIJ     *aij;
3117 
3118 
3119   PetscFunctionBegin;
3120   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3121   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3122 
3123   if (call ==  MAT_REUSE_MATRIX) {
3124     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3125     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3126     local = &Mreuse;
3127     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3128   } else {
3129     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3130     Mreuse = *local;
3131     ierr   = PetscFree(local);CHKERRQ(ierr);
3132   }
3133 
3134   /*
3135       m - number of local rows
3136       n - number of columns (same on all processors)
3137       rstart - first row in new global matrix generated
3138   */
3139   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3140   if (call == MAT_INITIAL_MATRIX) {
3141     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3142     ii  = aij->i;
3143     jj  = aij->j;
3144 
3145     /*
3146         Determine the number of non-zeros in the diagonal and off-diagonal
3147         portions of the matrix in order to do correct preallocation
3148     */
3149 
3150     /* first get start and end of "diagonal" columns */
3151     if (csize == PETSC_DECIDE) {
3152       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3153       if (mglobal == n) { /* square matrix */
3154 	nlocal = m;
3155       } else {
3156         nlocal = n/size + ((n % size) > rank);
3157       }
3158     } else {
3159       nlocal = csize;
3160     }
3161     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3162     rstart = rend - nlocal;
3163     if (rank == size - 1 && rend != n) {
3164       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3165     }
3166 
3167     /* next, compute all the lengths */
3168     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3169     olens = dlens + m;
3170     for (i=0; i<m; i++) {
3171       jend = ii[i+1] - ii[i];
3172       olen = 0;
3173       dlen = 0;
3174       for (j=0; j<jend; j++) {
3175         if (*jj < rstart || *jj >= rend) olen++;
3176         else dlen++;
3177         jj++;
3178       }
3179       olens[i] = olen;
3180       dlens[i] = dlen;
3181     }
3182     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3183     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3184     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3185     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3186     ierr = PetscFree(dlens);CHKERRQ(ierr);
3187   } else {
3188     PetscInt ml,nl;
3189 
3190     M = *newmat;
3191     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3192     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3193     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3194     /*
3195          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3196        rather than the slower MatSetValues().
3197     */
3198     M->was_assembled = PETSC_TRUE;
3199     M->assembled     = PETSC_FALSE;
3200   }
3201   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3202   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3203   ii  = aij->i;
3204   jj  = aij->j;
3205   aa  = aij->a;
3206   for (i=0; i<m; i++) {
3207     row   = rstart + i;
3208     nz    = ii[i+1] - ii[i];
3209     cwork = jj;     jj += nz;
3210     vwork = aa;     aa += nz;
3211     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3212   }
3213 
3214   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3215   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3216   *newmat = M;
3217 
3218   /* save submatrix used in processor for next request */
3219   if (call ==  MAT_INITIAL_MATRIX) {
3220     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3221     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3222   }
3223 
3224   PetscFunctionReturn(0);
3225 }
3226 
3227 EXTERN_C_BEGIN
3228 #undef __FUNCT__
3229 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3230 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3231 {
3232   PetscInt       m,cstart, cend,j,nnz,i,d;
3233   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3234   const PetscInt *JJ;
3235   PetscScalar    *values;
3236   PetscErrorCode ierr;
3237 
3238   PetscFunctionBegin;
3239   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3240 
3241   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3242   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3243   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3244   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3245   m      = B->rmap->n;
3246   cstart = B->cmap->rstart;
3247   cend   = B->cmap->rend;
3248   rstart = B->rmap->rstart;
3249 
3250   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3251 
3252 #if defined(PETSC_USE_DEBUGGING)
3253   for (i=0; i<m; i++) {
3254     nnz     = Ii[i+1]- Ii[i];
3255     JJ      = J + Ii[i];
3256     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3257     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3258     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3259   }
3260 #endif
3261 
3262   for (i=0; i<m; i++) {
3263     nnz     = Ii[i+1]- Ii[i];
3264     JJ      = J + Ii[i];
3265     nnz_max = PetscMax(nnz_max,nnz);
3266     d       = 0;
3267     for (j=0; j<nnz; j++) {
3268       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3269     }
3270     d_nnz[i] = d;
3271     o_nnz[i] = nnz - d;
3272   }
3273   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3274   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3275 
3276   if (v) values = (PetscScalar*)v;
3277   else {
3278     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3279     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3280   }
3281 
3282   for (i=0; i<m; i++) {
3283     ii   = i + rstart;
3284     nnz  = Ii[i+1]- Ii[i];
3285     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3286   }
3287   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3288   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3289 
3290   if (!v) {
3291     ierr = PetscFree(values);CHKERRQ(ierr);
3292   }
3293   PetscFunctionReturn(0);
3294 }
3295 EXTERN_C_END
3296 
3297 #undef __FUNCT__
3298 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3299 /*@
3300    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3301    (the default parallel PETSc format).
3302 
3303    Collective on MPI_Comm
3304 
3305    Input Parameters:
3306 +  B - the matrix
3307 .  i - the indices into j for the start of each local row (starts with zero)
3308 .  j - the column indices for each local row (starts with zero)
3309 -  v - optional values in the matrix
3310 
3311    Level: developer
3312 
3313    Notes:
3314        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3315      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3316      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3317 
3318        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3319 
3320        The format which is used for the sparse matrix input, is equivalent to a
3321     row-major ordering.. i.e for the following matrix, the input data expected is
3322     as shown:
3323 
3324         1 0 0
3325         2 0 3     P0
3326        -------
3327         4 5 6     P1
3328 
3329      Process0 [P0]: rows_owned=[0,1]
3330         i =  {0,1,3}  [size = nrow+1  = 2+1]
3331         j =  {0,0,2}  [size = nz = 6]
3332         v =  {1,2,3}  [size = nz = 6]
3333 
3334      Process1 [P1]: rows_owned=[2]
3335         i =  {0,3}    [size = nrow+1  = 1+1]
3336         j =  {0,1,2}  [size = nz = 6]
3337         v =  {4,5,6}  [size = nz = 6]
3338 
3339 .keywords: matrix, aij, compressed row, sparse, parallel
3340 
3341 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3342           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3343 @*/
3344 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3345 {
3346   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3347 
3348   PetscFunctionBegin;
3349   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3350   if (f) {
3351     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3352   }
3353   PetscFunctionReturn(0);
3354 }
3355 
3356 #undef __FUNCT__
3357 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3358 /*@C
3359    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3360    (the default parallel PETSc format).  For good matrix assembly performance
3361    the user should preallocate the matrix storage by setting the parameters
3362    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3363    performance can be increased by more than a factor of 50.
3364 
3365    Collective on MPI_Comm
3366 
3367    Input Parameters:
3368 +  A - the matrix
3369 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3370            (same value is used for all local rows)
3371 .  d_nnz - array containing the number of nonzeros in the various rows of the
3372            DIAGONAL portion of the local submatrix (possibly different for each row)
3373            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3374            The size of this array is equal to the number of local rows, i.e 'm'.
3375            You must leave room for the diagonal entry even if it is zero.
3376 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3377            submatrix (same value is used for all local rows).
3378 -  o_nnz - array containing the number of nonzeros in the various rows of the
3379            OFF-DIAGONAL portion of the local submatrix (possibly different for
3380            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3381            structure. The size of this array is equal to the number
3382            of local rows, i.e 'm'.
3383 
3384    If the *_nnz parameter is given then the *_nz parameter is ignored
3385 
3386    The AIJ format (also called the Yale sparse matrix format or
3387    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3388    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3389 
3390    The parallel matrix is partitioned such that the first m0 rows belong to
3391    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3392    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3393 
3394    The DIAGONAL portion of the local submatrix of a processor can be defined
3395    as the submatrix which is obtained by extraction the part corresponding
3396    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3397    first row that belongs to the processor, and r2 is the last row belonging
3398    to the this processor. This is a square mxm matrix. The remaining portion
3399    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3400 
3401    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3402 
3403    You can call MatGetInfo() to get information on how effective the preallocation was;
3404    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3405    You can also run with the option -info and look for messages with the string
3406    malloc in them to see if additional memory allocation was needed.
3407 
3408    Example usage:
3409 
3410    Consider the following 8x8 matrix with 34 non-zero values, that is
3411    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3412    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3413    as follows:
3414 
3415 .vb
3416             1  2  0  |  0  3  0  |  0  4
3417     Proc0   0  5  6  |  7  0  0  |  8  0
3418             9  0 10  | 11  0  0  | 12  0
3419     -------------------------------------
3420            13  0 14  | 15 16 17  |  0  0
3421     Proc1   0 18  0  | 19 20 21  |  0  0
3422             0  0  0  | 22 23  0  | 24  0
3423     -------------------------------------
3424     Proc2  25 26 27  |  0  0 28  | 29  0
3425            30  0  0  | 31 32 33  |  0 34
3426 .ve
3427 
3428    This can be represented as a collection of submatrices as:
3429 
3430 .vb
3431       A B C
3432       D E F
3433       G H I
3434 .ve
3435 
3436    Where the submatrices A,B,C are owned by proc0, D,E,F are
3437    owned by proc1, G,H,I are owned by proc2.
3438 
3439    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3440    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3441    The 'M','N' parameters are 8,8, and have the same values on all procs.
3442 
3443    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3444    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3445    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3446    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3447    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3448    matrix, ans [DF] as another SeqAIJ matrix.
3449 
3450    When d_nz, o_nz parameters are specified, d_nz storage elements are
3451    allocated for every row of the local diagonal submatrix, and o_nz
3452    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3453    One way to choose d_nz and o_nz is to use the max nonzerors per local
3454    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3455    In this case, the values of d_nz,o_nz are:
3456 .vb
3457      proc0 : dnz = 2, o_nz = 2
3458      proc1 : dnz = 3, o_nz = 2
3459      proc2 : dnz = 1, o_nz = 4
3460 .ve
3461    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3462    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3463    for proc3. i.e we are using 12+15+10=37 storage locations to store
3464    34 values.
3465 
3466    When d_nnz, o_nnz parameters are specified, the storage is specified
3467    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3468    In the above case the values for d_nnz,o_nnz are:
3469 .vb
3470      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3471      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3472      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3473 .ve
3474    Here the space allocated is sum of all the above values i.e 34, and
3475    hence pre-allocation is perfect.
3476 
3477    Level: intermediate
3478 
3479 .keywords: matrix, aij, compressed row, sparse, parallel
3480 
3481 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3482           MPIAIJ, MatGetInfo()
3483 @*/
3484 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3485 {
3486   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3487 
3488   PetscFunctionBegin;
3489   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3490   if (f) {
3491     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3492   }
3493   PetscFunctionReturn(0);
3494 }
3495 
3496 #undef __FUNCT__
3497 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3498 /*@
3499      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3500          CSR format the local rows.
3501 
3502    Collective on MPI_Comm
3503 
3504    Input Parameters:
3505 +  comm - MPI communicator
3506 .  m - number of local rows (Cannot be PETSC_DECIDE)
3507 .  n - This value should be the same as the local size used in creating the
3508        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3509        calculated if N is given) For square matrices n is almost always m.
3510 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3511 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3512 .   i - row indices
3513 .   j - column indices
3514 -   a - matrix values
3515 
3516    Output Parameter:
3517 .   mat - the matrix
3518 
3519    Level: intermediate
3520 
3521    Notes:
3522        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3523      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3524      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3525 
3526        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3527 
3528        The format which is used for the sparse matrix input, is equivalent to a
3529     row-major ordering.. i.e for the following matrix, the input data expected is
3530     as shown:
3531 
3532         1 0 0
3533         2 0 3     P0
3534        -------
3535         4 5 6     P1
3536 
3537      Process0 [P0]: rows_owned=[0,1]
3538         i =  {0,1,3}  [size = nrow+1  = 2+1]
3539         j =  {0,0,2}  [size = nz = 6]
3540         v =  {1,2,3}  [size = nz = 6]
3541 
3542      Process1 [P1]: rows_owned=[2]
3543         i =  {0,3}    [size = nrow+1  = 1+1]
3544         j =  {0,1,2}  [size = nz = 6]
3545         v =  {4,5,6}  [size = nz = 6]
3546 
3547 .keywords: matrix, aij, compressed row, sparse, parallel
3548 
3549 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3550           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3551 @*/
3552 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3553 {
3554   PetscErrorCode ierr;
3555 
3556  PetscFunctionBegin;
3557   if (i[0]) {
3558     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3559   }
3560   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3561   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3562   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3563   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3564   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3565   PetscFunctionReturn(0);
3566 }
3567 
3568 #undef __FUNCT__
3569 #define __FUNCT__ "MatCreateMPIAIJ"
3570 /*@C
3571    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3572    (the default parallel PETSc format).  For good matrix assembly performance
3573    the user should preallocate the matrix storage by setting the parameters
3574    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3575    performance can be increased by more than a factor of 50.
3576 
3577    Collective on MPI_Comm
3578 
3579    Input Parameters:
3580 +  comm - MPI communicator
3581 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3582            This value should be the same as the local size used in creating the
3583            y vector for the matrix-vector product y = Ax.
3584 .  n - This value should be the same as the local size used in creating the
3585        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3586        calculated if N is given) For square matrices n is almost always m.
3587 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3588 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3589 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3590            (same value is used for all local rows)
3591 .  d_nnz - array containing the number of nonzeros in the various rows of the
3592            DIAGONAL portion of the local submatrix (possibly different for each row)
3593            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3594            The size of this array is equal to the number of local rows, i.e 'm'.
3595            You must leave room for the diagonal entry even if it is zero.
3596 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3597            submatrix (same value is used for all local rows).
3598 -  o_nnz - array containing the number of nonzeros in the various rows of the
3599            OFF-DIAGONAL portion of the local submatrix (possibly different for
3600            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3601            structure. The size of this array is equal to the number
3602            of local rows, i.e 'm'.
3603 
3604    Output Parameter:
3605 .  A - the matrix
3606 
3607    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3608    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3609    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3610 
3611    Notes:
3612    If the *_nnz parameter is given then the *_nz parameter is ignored
3613 
3614    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3615    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3616    storage requirements for this matrix.
3617 
3618    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3619    processor than it must be used on all processors that share the object for
3620    that argument.
3621 
3622    The user MUST specify either the local or global matrix dimensions
3623    (possibly both).
3624 
3625    The parallel matrix is partitioned across processors such that the
3626    first m0 rows belong to process 0, the next m1 rows belong to
3627    process 1, the next m2 rows belong to process 2 etc.. where
3628    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3629    values corresponding to [m x N] submatrix.
3630 
3631    The columns are logically partitioned with the n0 columns belonging
3632    to 0th partition, the next n1 columns belonging to the next
3633    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3634 
3635    The DIAGONAL portion of the local submatrix on any given processor
3636    is the submatrix corresponding to the rows and columns m,n
3637    corresponding to the given processor. i.e diagonal matrix on
3638    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3639    etc. The remaining portion of the local submatrix [m x (N-n)]
3640    constitute the OFF-DIAGONAL portion. The example below better
3641    illustrates this concept.
3642 
3643    For a square global matrix we define each processor's diagonal portion
3644    to be its local rows and the corresponding columns (a square submatrix);
3645    each processor's off-diagonal portion encompasses the remainder of the
3646    local matrix (a rectangular submatrix).
3647 
3648    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3649 
3650    When calling this routine with a single process communicator, a matrix of
3651    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3652    type of communicator, use the construction mechanism:
3653      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3654 
3655    By default, this format uses inodes (identical nodes) when possible.
3656    We search for consecutive rows with the same nonzero structure, thereby
3657    reusing matrix information to achieve increased efficiency.
3658 
3659    Options Database Keys:
3660 +  -mat_no_inode  - Do not use inodes
3661 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3662 -  -mat_aij_oneindex - Internally use indexing starting at 1
3663         rather than 0.  Note that when calling MatSetValues(),
3664         the user still MUST index entries starting at 0!
3665 
3666 
3667    Example usage:
3668 
3669    Consider the following 8x8 matrix with 34 non-zero values, that is
3670    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3671    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3672    as follows:
3673 
3674 .vb
3675             1  2  0  |  0  3  0  |  0  4
3676     Proc0   0  5  6  |  7  0  0  |  8  0
3677             9  0 10  | 11  0  0  | 12  0
3678     -------------------------------------
3679            13  0 14  | 15 16 17  |  0  0
3680     Proc1   0 18  0  | 19 20 21  |  0  0
3681             0  0  0  | 22 23  0  | 24  0
3682     -------------------------------------
3683     Proc2  25 26 27  |  0  0 28  | 29  0
3684            30  0  0  | 31 32 33  |  0 34
3685 .ve
3686 
3687    This can be represented as a collection of submatrices as:
3688 
3689 .vb
3690       A B C
3691       D E F
3692       G H I
3693 .ve
3694 
3695    Where the submatrices A,B,C are owned by proc0, D,E,F are
3696    owned by proc1, G,H,I are owned by proc2.
3697 
3698    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3699    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3700    The 'M','N' parameters are 8,8, and have the same values on all procs.
3701 
3702    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3703    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3704    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3705    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3706    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3707    matrix, ans [DF] as another SeqAIJ matrix.
3708 
3709    When d_nz, o_nz parameters are specified, d_nz storage elements are
3710    allocated for every row of the local diagonal submatrix, and o_nz
3711    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3712    One way to choose d_nz and o_nz is to use the max nonzerors per local
3713    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3714    In this case, the values of d_nz,o_nz are:
3715 .vb
3716      proc0 : dnz = 2, o_nz = 2
3717      proc1 : dnz = 3, o_nz = 2
3718      proc2 : dnz = 1, o_nz = 4
3719 .ve
3720    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3721    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3722    for proc3. i.e we are using 12+15+10=37 storage locations to store
3723    34 values.
3724 
3725    When d_nnz, o_nnz parameters are specified, the storage is specified
3726    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3727    In the above case the values for d_nnz,o_nnz are:
3728 .vb
3729      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3730      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3731      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3732 .ve
3733    Here the space allocated is sum of all the above values i.e 34, and
3734    hence pre-allocation is perfect.
3735 
3736    Level: intermediate
3737 
3738 .keywords: matrix, aij, compressed row, sparse, parallel
3739 
3740 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3741           MPIAIJ, MatCreateMPIAIJWithArrays()
3742 @*/
3743 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3744 {
3745   PetscErrorCode ierr;
3746   PetscMPIInt    size;
3747 
3748   PetscFunctionBegin;
3749   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3750   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3751   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3752   if (size > 1) {
3753     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3754     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3755   } else {
3756     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3757     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3758   }
3759   PetscFunctionReturn(0);
3760 }
3761 
3762 #undef __FUNCT__
3763 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3764 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3765 {
3766   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3767 
3768   PetscFunctionBegin;
3769   *Ad     = a->A;
3770   *Ao     = a->B;
3771   *colmap = a->garray;
3772   PetscFunctionReturn(0);
3773 }
3774 
3775 #undef __FUNCT__
3776 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3777 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3778 {
3779   PetscErrorCode ierr;
3780   PetscInt       i;
3781   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3782 
3783   PetscFunctionBegin;
3784   if (coloring->ctype == IS_COLORING_GLOBAL) {
3785     ISColoringValue *allcolors,*colors;
3786     ISColoring      ocoloring;
3787 
3788     /* set coloring for diagonal portion */
3789     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3790 
3791     /* set coloring for off-diagonal portion */
3792     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3793     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3794     for (i=0; i<a->B->cmap->n; i++) {
3795       colors[i] = allcolors[a->garray[i]];
3796     }
3797     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3798     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3799     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3800     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3801   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3802     ISColoringValue *colors;
3803     PetscInt        *larray;
3804     ISColoring      ocoloring;
3805 
3806     /* set coloring for diagonal portion */
3807     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3808     for (i=0; i<a->A->cmap->n; i++) {
3809       larray[i] = i + A->cmap->rstart;
3810     }
3811     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3812     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3813     for (i=0; i<a->A->cmap->n; i++) {
3814       colors[i] = coloring->colors[larray[i]];
3815     }
3816     ierr = PetscFree(larray);CHKERRQ(ierr);
3817     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3818     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3819     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3820 
3821     /* set coloring for off-diagonal portion */
3822     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3823     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3824     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3825     for (i=0; i<a->B->cmap->n; i++) {
3826       colors[i] = coloring->colors[larray[i]];
3827     }
3828     ierr = PetscFree(larray);CHKERRQ(ierr);
3829     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3830     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3831     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3832   } else {
3833     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3834   }
3835 
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 #if defined(PETSC_HAVE_ADIC)
3840 #undef __FUNCT__
3841 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3842 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3843 {
3844   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3845   PetscErrorCode ierr;
3846 
3847   PetscFunctionBegin;
3848   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3849   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3850   PetscFunctionReturn(0);
3851 }
3852 #endif
3853 
3854 #undef __FUNCT__
3855 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3856 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3857 {
3858   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3859   PetscErrorCode ierr;
3860 
3861   PetscFunctionBegin;
3862   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3863   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3864   PetscFunctionReturn(0);
3865 }
3866 
3867 #undef __FUNCT__
3868 #define __FUNCT__ "MatMerge"
3869 /*@
3870       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3871                  matrices from each processor
3872 
3873     Collective on MPI_Comm
3874 
3875    Input Parameters:
3876 +    comm - the communicators the parallel matrix will live on
3877 .    inmat - the input sequential matrices
3878 .    n - number of local columns (or PETSC_DECIDE)
3879 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3880 
3881    Output Parameter:
3882 .    outmat - the parallel matrix generated
3883 
3884     Level: advanced
3885 
3886    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3887 
3888 @*/
3889 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3890 {
3891   PetscErrorCode ierr;
3892   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3893   PetscInt       *indx;
3894   PetscScalar    *values;
3895 
3896   PetscFunctionBegin;
3897   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3898   if (scall == MAT_INITIAL_MATRIX){
3899     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3900     if (n == PETSC_DECIDE){
3901       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3902     }
3903     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3904     rstart -= m;
3905 
3906     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3907     for (i=0;i<m;i++) {
3908       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3909       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3910       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3911     }
3912     /* This routine will ONLY return MPIAIJ type matrix */
3913     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3914     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3915     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3916     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3917     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3918 
3919   } else if (scall == MAT_REUSE_MATRIX){
3920     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3921   } else {
3922     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3923   }
3924 
3925   for (i=0;i<m;i++) {
3926     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3927     Ii    = i + rstart;
3928     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3929     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3930   }
3931   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3932   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3933   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3934 
3935   PetscFunctionReturn(0);
3936 }
3937 
3938 #undef __FUNCT__
3939 #define __FUNCT__ "MatFileSplit"
3940 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3941 {
3942   PetscErrorCode    ierr;
3943   PetscMPIInt       rank;
3944   PetscInt          m,N,i,rstart,nnz;
3945   size_t            len;
3946   const PetscInt    *indx;
3947   PetscViewer       out;
3948   char              *name;
3949   Mat               B;
3950   const PetscScalar *values;
3951 
3952   PetscFunctionBegin;
3953   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3954   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3955   /* Should this be the type of the diagonal block of A? */
3956   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3957   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3958   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3959   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3960   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3961   for (i=0;i<m;i++) {
3962     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3963     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3964     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3965   }
3966   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3967   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3968 
3969   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3970   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3971   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3972   sprintf(name,"%s.%d",outfile,rank);
3973   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3974   ierr = PetscFree(name);
3975   ierr = MatView(B,out);CHKERRQ(ierr);
3976   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3977   ierr = MatDestroy(B);CHKERRQ(ierr);
3978   PetscFunctionReturn(0);
3979 }
3980 
3981 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3982 #undef __FUNCT__
3983 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3984 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3985 {
3986   PetscErrorCode       ierr;
3987   Mat_Merge_SeqsToMPI  *merge;
3988   PetscContainer       container;
3989 
3990   PetscFunctionBegin;
3991   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3992   if (container) {
3993     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3994     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4002     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4003     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4004     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4005 
4006     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4007     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4008   }
4009   ierr = PetscFree(merge);CHKERRQ(ierr);
4010 
4011   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4012   PetscFunctionReturn(0);
4013 }
4014 
4015 #include "../src/mat/utils/freespace.h"
4016 #include "petscbt.h"
4017 
4018 #undef __FUNCT__
4019 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4020 /*@C
4021       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4022                  matrices from each processor
4023 
4024     Collective on MPI_Comm
4025 
4026    Input Parameters:
4027 +    comm - the communicators the parallel matrix will live on
4028 .    seqmat - the input sequential matrices
4029 .    m - number of local rows (or PETSC_DECIDE)
4030 .    n - number of local columns (or PETSC_DECIDE)
4031 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4032 
4033    Output Parameter:
4034 .    mpimat - the parallel matrix generated
4035 
4036     Level: advanced
4037 
4038    Notes:
4039      The dimensions of the sequential matrix in each processor MUST be the same.
4040      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4041      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4042 @*/
4043 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4044 {
4045   PetscErrorCode       ierr;
4046   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4047   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4048   PetscMPIInt          size,rank,taga,*len_s;
4049   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4050   PetscInt             proc,m;
4051   PetscInt             **buf_ri,**buf_rj;
4052   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4053   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4054   MPI_Request          *s_waits,*r_waits;
4055   MPI_Status           *status;
4056   MatScalar            *aa=a->a;
4057   MatScalar            **abuf_r,*ba_i;
4058   Mat_Merge_SeqsToMPI  *merge;
4059   PetscContainer       container;
4060 
4061   PetscFunctionBegin;
4062   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4063 
4064   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4065   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4066 
4067   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4068   if (container) {
4069     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4070   }
4071   bi     = merge->bi;
4072   bj     = merge->bj;
4073   buf_ri = merge->buf_ri;
4074   buf_rj = merge->buf_rj;
4075 
4076   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4077   owners = merge->rowmap->range;
4078   len_s  = merge->len_s;
4079 
4080   /* send and recv matrix values */
4081   /*-----------------------------*/
4082   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4083   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4084 
4085   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4086   for (proc=0,k=0; proc<size; proc++){
4087     if (!len_s[proc]) continue;
4088     i = owners[proc];
4089     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4090     k++;
4091   }
4092 
4093   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4094   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4095   ierr = PetscFree(status);CHKERRQ(ierr);
4096 
4097   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4098   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4099 
4100   /* insert mat values of mpimat */
4101   /*----------------------------*/
4102   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4103   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4104 
4105   for (k=0; k<merge->nrecv; k++){
4106     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4107     nrows = *(buf_ri_k[k]);
4108     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4109     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4110   }
4111 
4112   /* set values of ba */
4113   m = merge->rowmap->n;
4114   for (i=0; i<m; i++) {
4115     arow = owners[rank] + i;
4116     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4117     bnzi = bi[i+1] - bi[i];
4118     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4119 
4120     /* add local non-zero vals of this proc's seqmat into ba */
4121     anzi = ai[arow+1] - ai[arow];
4122     aj   = a->j + ai[arow];
4123     aa   = a->a + ai[arow];
4124     nextaj = 0;
4125     for (j=0; nextaj<anzi; j++){
4126       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4127         ba_i[j] += aa[nextaj++];
4128       }
4129     }
4130 
4131     /* add received vals into ba */
4132     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4133       /* i-th row */
4134       if (i == *nextrow[k]) {
4135         anzi = *(nextai[k]+1) - *nextai[k];
4136         aj   = buf_rj[k] + *(nextai[k]);
4137         aa   = abuf_r[k] + *(nextai[k]);
4138         nextaj = 0;
4139         for (j=0; nextaj<anzi; j++){
4140           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4141             ba_i[j] += aa[nextaj++];
4142           }
4143         }
4144         nextrow[k]++; nextai[k]++;
4145       }
4146     }
4147     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4148   }
4149   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4150   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4151 
4152   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4153   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4154   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4155   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4156   PetscFunctionReturn(0);
4157 }
4158 
4159 #undef __FUNCT__
4160 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4161 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4162 {
4163   PetscErrorCode       ierr;
4164   Mat                  B_mpi;
4165   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4166   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4167   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4168   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4169   PetscInt             len,proc,*dnz,*onz;
4170   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4171   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4172   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4173   MPI_Status           *status;
4174   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4175   PetscBT              lnkbt;
4176   Mat_Merge_SeqsToMPI  *merge;
4177   PetscContainer       container;
4178 
4179   PetscFunctionBegin;
4180   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4181 
4182   /* make sure it is a PETSc comm */
4183   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4184   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4185   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4186 
4187   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4188   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4189 
4190   /* determine row ownership */
4191   /*---------------------------------------------------------*/
4192   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4193   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4194   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4195   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4196   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4197   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4198   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4199 
4200   m      = merge->rowmap->n;
4201   M      = merge->rowmap->N;
4202   owners = merge->rowmap->range;
4203 
4204   /* determine the number of messages to send, their lengths */
4205   /*---------------------------------------------------------*/
4206   len_s  = merge->len_s;
4207 
4208   len = 0;  /* length of buf_si[] */
4209   merge->nsend = 0;
4210   for (proc=0; proc<size; proc++){
4211     len_si[proc] = 0;
4212     if (proc == rank){
4213       len_s[proc] = 0;
4214     } else {
4215       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4216       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4217     }
4218     if (len_s[proc]) {
4219       merge->nsend++;
4220       nrows = 0;
4221       for (i=owners[proc]; i<owners[proc+1]; i++){
4222         if (ai[i+1] > ai[i]) nrows++;
4223       }
4224       len_si[proc] = 2*(nrows+1);
4225       len += len_si[proc];
4226     }
4227   }
4228 
4229   /* determine the number and length of messages to receive for ij-structure */
4230   /*-------------------------------------------------------------------------*/
4231   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4232   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4233 
4234   /* post the Irecv of j-structure */
4235   /*-------------------------------*/
4236   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4237   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4238 
4239   /* post the Isend of j-structure */
4240   /*--------------------------------*/
4241   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4242 
4243   for (proc=0, k=0; proc<size; proc++){
4244     if (!len_s[proc]) continue;
4245     i = owners[proc];
4246     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4247     k++;
4248   }
4249 
4250   /* receives and sends of j-structure are complete */
4251   /*------------------------------------------------*/
4252   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4253   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4254 
4255   /* send and recv i-structure */
4256   /*---------------------------*/
4257   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4258   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4259 
4260   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4261   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4262   for (proc=0,k=0; proc<size; proc++){
4263     if (!len_s[proc]) continue;
4264     /* form outgoing message for i-structure:
4265          buf_si[0]:                 nrows to be sent
4266                [1:nrows]:           row index (global)
4267                [nrows+1:2*nrows+1]: i-structure index
4268     */
4269     /*-------------------------------------------*/
4270     nrows = len_si[proc]/2 - 1;
4271     buf_si_i    = buf_si + nrows+1;
4272     buf_si[0]   = nrows;
4273     buf_si_i[0] = 0;
4274     nrows = 0;
4275     for (i=owners[proc]; i<owners[proc+1]; i++){
4276       anzi = ai[i+1] - ai[i];
4277       if (anzi) {
4278         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4279         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4280         nrows++;
4281       }
4282     }
4283     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4284     k++;
4285     buf_si += len_si[proc];
4286   }
4287 
4288   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4289   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4290 
4291   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4292   for (i=0; i<merge->nrecv; i++){
4293     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4294   }
4295 
4296   ierr = PetscFree(len_si);CHKERRQ(ierr);
4297   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4298   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4299   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4300   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4301   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4302   ierr = PetscFree(status);CHKERRQ(ierr);
4303 
4304   /* compute a local seq matrix in each processor */
4305   /*----------------------------------------------*/
4306   /* allocate bi array and free space for accumulating nonzero column info */
4307   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4308   bi[0] = 0;
4309 
4310   /* create and initialize a linked list */
4311   nlnk = N+1;
4312   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4313 
4314   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4315   len = 0;
4316   len  = ai[owners[rank+1]] - ai[owners[rank]];
4317   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4318   current_space = free_space;
4319 
4320   /* determine symbolic info for each local row */
4321   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4322 
4323   for (k=0; k<merge->nrecv; k++){
4324     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4325     nrows = *buf_ri_k[k];
4326     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4327     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4328   }
4329 
4330   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4331   len = 0;
4332   for (i=0;i<m;i++) {
4333     bnzi   = 0;
4334     /* add local non-zero cols of this proc's seqmat into lnk */
4335     arow   = owners[rank] + i;
4336     anzi   = ai[arow+1] - ai[arow];
4337     aj     = a->j + ai[arow];
4338     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4339     bnzi += nlnk;
4340     /* add received col data into lnk */
4341     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4342       if (i == *nextrow[k]) { /* i-th row */
4343         anzi = *(nextai[k]+1) - *nextai[k];
4344         aj   = buf_rj[k] + *nextai[k];
4345         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4346         bnzi += nlnk;
4347         nextrow[k]++; nextai[k]++;
4348       }
4349     }
4350     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4351 
4352     /* if free space is not available, make more free space */
4353     if (current_space->local_remaining<bnzi) {
4354       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4355       nspacedouble++;
4356     }
4357     /* copy data into free space, then initialize lnk */
4358     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4359     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4360 
4361     current_space->array           += bnzi;
4362     current_space->local_used      += bnzi;
4363     current_space->local_remaining -= bnzi;
4364 
4365     bi[i+1] = bi[i] + bnzi;
4366   }
4367 
4368   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4369 
4370   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4371   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4372   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4373 
4374   /* create symbolic parallel matrix B_mpi */
4375   /*---------------------------------------*/
4376   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4377   if (n==PETSC_DECIDE) {
4378     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4379   } else {
4380     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4381   }
4382   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4383   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4384   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4385 
4386   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4387   B_mpi->assembled     = PETSC_FALSE;
4388   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4389   merge->bi            = bi;
4390   merge->bj            = bj;
4391   merge->buf_ri        = buf_ri;
4392   merge->buf_rj        = buf_rj;
4393   merge->coi           = PETSC_NULL;
4394   merge->coj           = PETSC_NULL;
4395   merge->owners_co     = PETSC_NULL;
4396 
4397   /* attach the supporting struct to B_mpi for reuse */
4398   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4399   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4400   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4401   *mpimat = B_mpi;
4402 
4403   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4404   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4405   PetscFunctionReturn(0);
4406 }
4407 
4408 #undef __FUNCT__
4409 #define __FUNCT__ "MatMerge_SeqsToMPI"
4410 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4411 {
4412   PetscErrorCode   ierr;
4413 
4414   PetscFunctionBegin;
4415   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4416   if (scall == MAT_INITIAL_MATRIX){
4417     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4418   }
4419   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4420   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4421   PetscFunctionReturn(0);
4422 }
4423 
4424 #undef __FUNCT__
4425 #define __FUNCT__ "MatGetLocalMat"
4426 /*@
4427      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4428 
4429     Not Collective
4430 
4431    Input Parameters:
4432 +    A - the matrix
4433 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4434 
4435    Output Parameter:
4436 .    A_loc - the local sequential matrix generated
4437 
4438     Level: developer
4439 
4440 @*/
4441 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4442 {
4443   PetscErrorCode  ierr;
4444   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4445   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4446   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4447   MatScalar       *aa=a->a,*ba=b->a,*cam;
4448   PetscScalar     *ca;
4449   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4450   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4451 
4452   PetscFunctionBegin;
4453   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4454   if (scall == MAT_INITIAL_MATRIX){
4455     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4456     ci[0] = 0;
4457     for (i=0; i<am; i++){
4458       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4459     }
4460     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4461     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4462     k = 0;
4463     for (i=0; i<am; i++) {
4464       ncols_o = bi[i+1] - bi[i];
4465       ncols_d = ai[i+1] - ai[i];
4466       /* off-diagonal portion of A */
4467       for (jo=0; jo<ncols_o; jo++) {
4468         col = cmap[*bj];
4469         if (col >= cstart) break;
4470         cj[k]   = col; bj++;
4471         ca[k++] = *ba++;
4472       }
4473       /* diagonal portion of A */
4474       for (j=0; j<ncols_d; j++) {
4475         cj[k]   = cstart + *aj++;
4476         ca[k++] = *aa++;
4477       }
4478       /* off-diagonal portion of A */
4479       for (j=jo; j<ncols_o; j++) {
4480         cj[k]   = cmap[*bj++];
4481         ca[k++] = *ba++;
4482       }
4483     }
4484     /* put together the new matrix */
4485     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4486     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4487     /* Since these are PETSc arrays, change flags to free them as necessary. */
4488     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4489     mat->free_a  = PETSC_TRUE;
4490     mat->free_ij = PETSC_TRUE;
4491     mat->nonew   = 0;
4492   } else if (scall == MAT_REUSE_MATRIX){
4493     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4494     ci = mat->i; cj = mat->j; cam = mat->a;
4495     for (i=0; i<am; i++) {
4496       /* off-diagonal portion of A */
4497       ncols_o = bi[i+1] - bi[i];
4498       for (jo=0; jo<ncols_o; jo++) {
4499         col = cmap[*bj];
4500         if (col >= cstart) break;
4501         *cam++ = *ba++; bj++;
4502       }
4503       /* diagonal portion of A */
4504       ncols_d = ai[i+1] - ai[i];
4505       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4506       /* off-diagonal portion of A */
4507       for (j=jo; j<ncols_o; j++) {
4508         *cam++ = *ba++; bj++;
4509       }
4510     }
4511   } else {
4512     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4513   }
4514 
4515   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4516   PetscFunctionReturn(0);
4517 }
4518 
4519 #undef __FUNCT__
4520 #define __FUNCT__ "MatGetLocalMatCondensed"
4521 /*@C
4522      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4523 
4524     Not Collective
4525 
4526    Input Parameters:
4527 +    A - the matrix
4528 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4529 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4530 
4531    Output Parameter:
4532 .    A_loc - the local sequential matrix generated
4533 
4534     Level: developer
4535 
4536 @*/
4537 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4538 {
4539   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4540   PetscErrorCode    ierr;
4541   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4542   IS                isrowa,iscola;
4543   Mat               *aloc;
4544 
4545   PetscFunctionBegin;
4546   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4547   if (!row){
4548     start = A->rmap->rstart; end = A->rmap->rend;
4549     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4550   } else {
4551     isrowa = *row;
4552   }
4553   if (!col){
4554     start = A->cmap->rstart;
4555     cmap  = a->garray;
4556     nzA   = a->A->cmap->n;
4557     nzB   = a->B->cmap->n;
4558     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4559     ncols = 0;
4560     for (i=0; i<nzB; i++) {
4561       if (cmap[i] < start) idx[ncols++] = cmap[i];
4562       else break;
4563     }
4564     imark = i;
4565     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4566     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4567     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4568     ierr = PetscFree(idx);CHKERRQ(ierr);
4569   } else {
4570     iscola = *col;
4571   }
4572   if (scall != MAT_INITIAL_MATRIX){
4573     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4574     aloc[0] = *A_loc;
4575   }
4576   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4577   *A_loc = aloc[0];
4578   ierr = PetscFree(aloc);CHKERRQ(ierr);
4579   if (!row){
4580     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4581   }
4582   if (!col){
4583     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4584   }
4585   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4586   PetscFunctionReturn(0);
4587 }
4588 
4589 #undef __FUNCT__
4590 #define __FUNCT__ "MatGetBrowsOfAcols"
4591 /*@C
4592     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4593 
4594     Collective on Mat
4595 
4596    Input Parameters:
4597 +    A,B - the matrices in mpiaij format
4598 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4599 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4600 
4601    Output Parameter:
4602 +    rowb, colb - index sets of rows and columns of B to extract
4603 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4604 -    B_seq - the sequential matrix generated
4605 
4606     Level: developer
4607 
4608 @*/
4609 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4610 {
4611   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4612   PetscErrorCode    ierr;
4613   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4614   IS                isrowb,iscolb;
4615   Mat               *bseq;
4616 
4617   PetscFunctionBegin;
4618   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4619     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4620   }
4621   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4622 
4623   if (scall == MAT_INITIAL_MATRIX){
4624     start = A->cmap->rstart;
4625     cmap  = a->garray;
4626     nzA   = a->A->cmap->n;
4627     nzB   = a->B->cmap->n;
4628     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4629     ncols = 0;
4630     for (i=0; i<nzB; i++) {  /* row < local row index */
4631       if (cmap[i] < start) idx[ncols++] = cmap[i];
4632       else break;
4633     }
4634     imark = i;
4635     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4636     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4637     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4638     ierr = PetscFree(idx);CHKERRQ(ierr);
4639     *brstart = imark;
4640     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4641   } else {
4642     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4643     isrowb = *rowb; iscolb = *colb;
4644     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4645     bseq[0] = *B_seq;
4646   }
4647   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4648   *B_seq = bseq[0];
4649   ierr = PetscFree(bseq);CHKERRQ(ierr);
4650   if (!rowb){
4651     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4652   } else {
4653     *rowb = isrowb;
4654   }
4655   if (!colb){
4656     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4657   } else {
4658     *colb = iscolb;
4659   }
4660   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4661   PetscFunctionReturn(0);
4662 }
4663 
4664 #undef __FUNCT__
4665 #define __FUNCT__ "MatGetBrowsOfAoCols"
4666 /*@C
4667     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4668     of the OFF-DIAGONAL portion of local A
4669 
4670     Collective on Mat
4671 
4672    Input Parameters:
4673 +    A,B - the matrices in mpiaij format
4674 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4675 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4676 .    startsj_r - similar to startsj for receives
4677 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4678 
4679    Output Parameter:
4680 +    B_oth - the sequential matrix generated
4681 
4682     Level: developer
4683 
4684 @*/
4685 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4686 {
4687   VecScatter_MPI_General *gen_to,*gen_from;
4688   PetscErrorCode         ierr;
4689   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4690   Mat_SeqAIJ             *b_oth;
4691   VecScatter             ctx=a->Mvctx;
4692   MPI_Comm               comm=((PetscObject)ctx)->comm;
4693   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4694   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4695   PetscScalar            *rvalues,*svalues;
4696   MatScalar              *b_otha,*bufa,*bufA;
4697   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4698   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4699   MPI_Status             *sstatus,rstatus;
4700   PetscMPIInt            jj;
4701   PetscInt               *cols,sbs,rbs;
4702   PetscScalar            *vals;
4703 
4704   PetscFunctionBegin;
4705   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4706     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4707   }
4708   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4709   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4710 
4711   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4712   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4713   rvalues  = gen_from->values; /* holds the length of receiving row */
4714   svalues  = gen_to->values;   /* holds the length of sending row */
4715   nrecvs   = gen_from->n;
4716   nsends   = gen_to->n;
4717 
4718   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4719   srow     = gen_to->indices;   /* local row index to be sent */
4720   sstarts  = gen_to->starts;
4721   sprocs   = gen_to->procs;
4722   sstatus  = gen_to->sstatus;
4723   sbs      = gen_to->bs;
4724   rstarts  = gen_from->starts;
4725   rprocs   = gen_from->procs;
4726   rbs      = gen_from->bs;
4727 
4728   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4729   if (scall == MAT_INITIAL_MATRIX){
4730     /* i-array */
4731     /*---------*/
4732     /*  post receives */
4733     for (i=0; i<nrecvs; i++){
4734       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4735       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4736       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4737     }
4738 
4739     /* pack the outgoing message */
4740     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4741     sstartsj[0] = 0;  rstartsj[0] = 0;
4742     len = 0; /* total length of j or a array to be sent */
4743     k = 0;
4744     for (i=0; i<nsends; i++){
4745       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4746       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4747       for (j=0; j<nrows; j++) {
4748         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4749         for (l=0; l<sbs; l++){
4750           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4751           rowlen[j*sbs+l] = ncols;
4752           len += ncols;
4753           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4754         }
4755         k++;
4756       }
4757       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4758       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4759     }
4760     /* recvs and sends of i-array are completed */
4761     i = nrecvs;
4762     while (i--) {
4763       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4764     }
4765     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4766 
4767     /* allocate buffers for sending j and a arrays */
4768     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4769     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4770 
4771     /* create i-array of B_oth */
4772     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4773     b_othi[0] = 0;
4774     len = 0; /* total length of j or a array to be received */
4775     k = 0;
4776     for (i=0; i<nrecvs; i++){
4777       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4778       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4779       for (j=0; j<nrows; j++) {
4780         b_othi[k+1] = b_othi[k] + rowlen[j];
4781         len += rowlen[j]; k++;
4782       }
4783       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4784     }
4785 
4786     /* allocate space for j and a arrrays of B_oth */
4787     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4788     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4789 
4790     /* j-array */
4791     /*---------*/
4792     /*  post receives of j-array */
4793     for (i=0; i<nrecvs; i++){
4794       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4795       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4796     }
4797 
4798     /* pack the outgoing message j-array */
4799     k = 0;
4800     for (i=0; i<nsends; i++){
4801       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4802       bufJ = bufj+sstartsj[i];
4803       for (j=0; j<nrows; j++) {
4804         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4805         for (ll=0; ll<sbs; ll++){
4806           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4807           for (l=0; l<ncols; l++){
4808             *bufJ++ = cols[l];
4809           }
4810           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4811         }
4812       }
4813       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4814     }
4815 
4816     /* recvs and sends of j-array are completed */
4817     i = nrecvs;
4818     while (i--) {
4819       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4820     }
4821     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4822   } else if (scall == MAT_REUSE_MATRIX){
4823     sstartsj = *startsj;
4824     rstartsj = *startsj_r;
4825     bufa     = *bufa_ptr;
4826     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4827     b_otha   = b_oth->a;
4828   } else {
4829     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4830   }
4831 
4832   /* a-array */
4833   /*---------*/
4834   /*  post receives of a-array */
4835   for (i=0; i<nrecvs; i++){
4836     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4837     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4838   }
4839 
4840   /* pack the outgoing message a-array */
4841   k = 0;
4842   for (i=0; i<nsends; i++){
4843     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4844     bufA = bufa+sstartsj[i];
4845     for (j=0; j<nrows; j++) {
4846       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4847       for (ll=0; ll<sbs; ll++){
4848         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4849         for (l=0; l<ncols; l++){
4850           *bufA++ = vals[l];
4851         }
4852         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4853       }
4854     }
4855     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4856   }
4857   /* recvs and sends of a-array are completed */
4858   i = nrecvs;
4859   while (i--) {
4860     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4861   }
4862   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4863   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4864 
4865   if (scall == MAT_INITIAL_MATRIX){
4866     /* put together the new matrix */
4867     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4868 
4869     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4870     /* Since these are PETSc arrays, change flags to free them as necessary. */
4871     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4872     b_oth->free_a  = PETSC_TRUE;
4873     b_oth->free_ij = PETSC_TRUE;
4874     b_oth->nonew   = 0;
4875 
4876     ierr = PetscFree(bufj);CHKERRQ(ierr);
4877     if (!startsj || !bufa_ptr){
4878       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4879       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4880     } else {
4881       *startsj   = sstartsj;
4882       *startsj_r = rstartsj;
4883       *bufa_ptr  = bufa;
4884     }
4885   }
4886   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4887   PetscFunctionReturn(0);
4888 }
4889 
4890 #undef __FUNCT__
4891 #define __FUNCT__ "MatGetCommunicationStructs"
4892 /*@C
4893   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4894 
4895   Not Collective
4896 
4897   Input Parameters:
4898 . A - The matrix in mpiaij format
4899 
4900   Output Parameter:
4901 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4902 . colmap - A map from global column index to local index into lvec
4903 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4904 
4905   Level: developer
4906 
4907 @*/
4908 #if defined (PETSC_USE_CTABLE)
4909 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4910 #else
4911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4912 #endif
4913 {
4914   Mat_MPIAIJ *a;
4915 
4916   PetscFunctionBegin;
4917   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4918   PetscValidPointer(lvec, 2)
4919   PetscValidPointer(colmap, 3)
4920   PetscValidPointer(multScatter, 4)
4921   a = (Mat_MPIAIJ *) A->data;
4922   if (lvec) *lvec = a->lvec;
4923   if (colmap) *colmap = a->colmap;
4924   if (multScatter) *multScatter = a->Mvctx;
4925   PetscFunctionReturn(0);
4926 }
4927 
4928 EXTERN_C_BEGIN
4929 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4930 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4931 EXTERN_C_END
4932 
4933 #undef __FUNCT__
4934 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4935 /*
4936     Computes (B'*A')' since computing B*A directly is untenable
4937 
4938                n                       p                          p
4939         (              )       (              )         (                  )
4940       m (      A       )  *  n (       B      )   =   m (         C        )
4941         (              )       (              )         (                  )
4942 
4943 */
4944 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4945 {
4946   PetscErrorCode     ierr;
4947   Mat                At,Bt,Ct;
4948 
4949   PetscFunctionBegin;
4950   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4951   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4952   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4953   ierr = MatDestroy(At);CHKERRQ(ierr);
4954   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4955   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4956   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4957   PetscFunctionReturn(0);
4958 }
4959 
4960 #undef __FUNCT__
4961 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4962 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4963 {
4964   PetscErrorCode ierr;
4965   PetscInt       m=A->rmap->n,n=B->cmap->n;
4966   Mat            Cmat;
4967 
4968   PetscFunctionBegin;
4969   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4970   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4971   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4972   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4973   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4974   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4975   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4976   *C   = Cmat;
4977   PetscFunctionReturn(0);
4978 }
4979 
4980 /* ----------------------------------------------------------------*/
4981 #undef __FUNCT__
4982 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4983 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4984 {
4985   PetscErrorCode ierr;
4986 
4987   PetscFunctionBegin;
4988   if (scall == MAT_INITIAL_MATRIX){
4989     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4990   }
4991   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4992   PetscFunctionReturn(0);
4993 }
4994 
4995 EXTERN_C_BEGIN
4996 #if defined(PETSC_HAVE_MUMPS)
4997 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4998 #endif
4999 #if defined(PETSC_HAVE_PASTIX)
5000 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5001 #endif
5002 #if defined(PETSC_HAVE_SUPERLU_DIST)
5003 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5004 #endif
5005 #if defined(PETSC_HAVE_SPOOLES)
5006 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5007 #endif
5008 EXTERN_C_END
5009 
5010 /*MC
5011    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5012 
5013    Options Database Keys:
5014 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5015 
5016   Level: beginner
5017 
5018 .seealso: MatCreateMPIAIJ()
5019 M*/
5020 
5021 EXTERN_C_BEGIN
5022 #undef __FUNCT__
5023 #define __FUNCT__ "MatCreate_MPIAIJ"
5024 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5025 {
5026   Mat_MPIAIJ     *b;
5027   PetscErrorCode ierr;
5028   PetscMPIInt    size;
5029 
5030   PetscFunctionBegin;
5031   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5032 
5033   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5034   B->data         = (void*)b;
5035   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5036   B->rmap->bs     = 1;
5037   B->assembled    = PETSC_FALSE;
5038   B->mapping      = 0;
5039 
5040   B->insertmode   = NOT_SET_VALUES;
5041   b->size         = size;
5042   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5043 
5044   /* build cache for off array entries formed */
5045   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5046   b->donotstash  = PETSC_FALSE;
5047   b->colmap      = 0;
5048   b->garray      = 0;
5049   b->roworiented = PETSC_TRUE;
5050 
5051   /* stuff used for matrix vector multiply */
5052   b->lvec      = PETSC_NULL;
5053   b->Mvctx     = PETSC_NULL;
5054 
5055   /* stuff for MatGetRow() */
5056   b->rowindices   = 0;
5057   b->rowvalues    = 0;
5058   b->getrowactive = PETSC_FALSE;
5059 
5060 #if defined(PETSC_HAVE_SPOOLES)
5061   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5062                                      "MatGetFactor_mpiaij_spooles",
5063                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5064 #endif
5065 #if defined(PETSC_HAVE_MUMPS)
5066   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5067                                      "MatGetFactor_mpiaij_mumps",
5068                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5069 #endif
5070 #if defined(PETSC_HAVE_PASTIX)
5071   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5072 					   "MatGetFactor_mpiaij_pastix",
5073 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5074 #endif
5075 #if defined(PETSC_HAVE_SUPERLU_DIST)
5076   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5077                                      "MatGetFactor_mpiaij_superlu_dist",
5078                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5079 #endif
5080   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5081                                      "MatStoreValues_MPIAIJ",
5082                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5083   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5084                                      "MatRetrieveValues_MPIAIJ",
5085                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5086   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5087 				     "MatGetDiagonalBlock_MPIAIJ",
5088                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5089   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5090 				     "MatIsTranspose_MPIAIJ",
5091 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5092   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5093 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5094 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5095   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5096 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5097 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5098   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5099 				     "MatDiagonalScaleLocal_MPIAIJ",
5100 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5101   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5102                                      "MatConvert_MPIAIJ_MPICSRPERM",
5103                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5104   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5105                                      "MatConvert_MPIAIJ_MPICRL",
5106                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5107   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5108                                      "MatConvert_MPIAIJ_MPISBAIJ",
5109                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5110   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5111                                      "MatMatMult_MPIDense_MPIAIJ",
5112                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5113   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5114                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5115                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5116   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5117                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5118                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5119   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5120   PetscFunctionReturn(0);
5121 }
5122 EXTERN_C_END
5123 
5124 #undef __FUNCT__
5125 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5126 /*@
5127      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5128          and "off-diagonal" part of the matrix in CSR format.
5129 
5130    Collective on MPI_Comm
5131 
5132    Input Parameters:
5133 +  comm - MPI communicator
5134 .  m - number of local rows (Cannot be PETSC_DECIDE)
5135 .  n - This value should be the same as the local size used in creating the
5136        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5137        calculated if N is given) For square matrices n is almost always m.
5138 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5139 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5140 .   i - row indices for "diagonal" portion of matrix
5141 .   j - column indices
5142 .   a - matrix values
5143 .   oi - row indices for "off-diagonal" portion of matrix
5144 .   oj - column indices
5145 -   oa - matrix values
5146 
5147    Output Parameter:
5148 .   mat - the matrix
5149 
5150    Level: advanced
5151 
5152    Notes:
5153        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5154 
5155        The i and j indices are 0 based
5156 
5157        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5158 
5159        This sets local rows and cannot be used to set off-processor values.
5160 
5161        You cannot later use MatSetValues() to change values in this matrix.
5162 
5163 .keywords: matrix, aij, compressed row, sparse, parallel
5164 
5165 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5166           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5167 @*/
5168 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5169 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5170 {
5171   PetscErrorCode ierr;
5172   Mat_MPIAIJ     *maij;
5173 
5174  PetscFunctionBegin;
5175   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5176   if (i[0]) {
5177     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5178   }
5179   if (oi[0]) {
5180     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5181   }
5182   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5183   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5184   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5185   maij = (Mat_MPIAIJ*) (*mat)->data;
5186   maij->donotstash     = PETSC_TRUE;
5187   (*mat)->preallocated = PETSC_TRUE;
5188 
5189   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5190   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5191   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5192   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5193 
5194   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5195   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5196 
5197   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5198   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5199   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5200   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5201 
5202   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5203   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5204   PetscFunctionReturn(0);
5205 }
5206 
5207 /*
5208     Special version for direct calls from Fortran
5209 */
5210 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5211 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5212 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5213 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5214 #endif
5215 
5216 /* Change these macros so can be used in void function */
5217 #undef CHKERRQ
5218 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5219 #undef SETERRQ2
5220 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5221 #undef SETERRQ
5222 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5223 
5224 EXTERN_C_BEGIN
5225 #undef __FUNCT__
5226 #define __FUNCT__ "matsetvaluesmpiaij_"
5227 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5228 {
5229   Mat             mat = *mmat;
5230   PetscInt        m = *mm, n = *mn;
5231   InsertMode      addv = *maddv;
5232   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5233   PetscScalar     value;
5234   PetscErrorCode  ierr;
5235 
5236   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5237   if (mat->insertmode == NOT_SET_VALUES) {
5238     mat->insertmode = addv;
5239   }
5240 #if defined(PETSC_USE_DEBUG)
5241   else if (mat->insertmode != addv) {
5242     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5243   }
5244 #endif
5245   {
5246   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5247   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5248   PetscTruth      roworiented = aij->roworiented;
5249 
5250   /* Some Variables required in the macro */
5251   Mat             A = aij->A;
5252   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5253   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5254   MatScalar       *aa = a->a;
5255   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5256   Mat             B = aij->B;
5257   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5258   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5259   MatScalar       *ba = b->a;
5260 
5261   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5262   PetscInt        nonew = a->nonew;
5263   MatScalar       *ap1,*ap2;
5264 
5265   PetscFunctionBegin;
5266   for (i=0; i<m; i++) {
5267     if (im[i] < 0) continue;
5268 #if defined(PETSC_USE_DEBUG)
5269     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5270 #endif
5271     if (im[i] >= rstart && im[i] < rend) {
5272       row      = im[i] - rstart;
5273       lastcol1 = -1;
5274       rp1      = aj + ai[row];
5275       ap1      = aa + ai[row];
5276       rmax1    = aimax[row];
5277       nrow1    = ailen[row];
5278       low1     = 0;
5279       high1    = nrow1;
5280       lastcol2 = -1;
5281       rp2      = bj + bi[row];
5282       ap2      = ba + bi[row];
5283       rmax2    = bimax[row];
5284       nrow2    = bilen[row];
5285       low2     = 0;
5286       high2    = nrow2;
5287 
5288       for (j=0; j<n; j++) {
5289         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5290         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5291         if (in[j] >= cstart && in[j] < cend){
5292           col = in[j] - cstart;
5293           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5294         } else if (in[j] < 0) continue;
5295 #if defined(PETSC_USE_DEBUG)
5296         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5297 #endif
5298         else {
5299           if (mat->was_assembled) {
5300             if (!aij->colmap) {
5301               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5302             }
5303 #if defined (PETSC_USE_CTABLE)
5304             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5305 	    col--;
5306 #else
5307             col = aij->colmap[in[j]] - 1;
5308 #endif
5309             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5310               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5311               col =  in[j];
5312               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5313               B = aij->B;
5314               b = (Mat_SeqAIJ*)B->data;
5315               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5316               rp2      = bj + bi[row];
5317               ap2      = ba + bi[row];
5318               rmax2    = bimax[row];
5319               nrow2    = bilen[row];
5320               low2     = 0;
5321               high2    = nrow2;
5322               bm       = aij->B->rmap->n;
5323               ba = b->a;
5324             }
5325           } else col = in[j];
5326           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5327         }
5328       }
5329     } else {
5330       if (!aij->donotstash) {
5331         if (roworiented) {
5332           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5333         } else {
5334           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5335         }
5336       }
5337     }
5338   }}
5339   PetscFunctionReturnVoid();
5340 }
5341 EXTERN_C_END
5342 
5343