xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 35e7444da1e6edf29ca585d75dc3fe3d2c63a6e4)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 
5 #undef __FUNCT__
6 #define __FUNCT__ "MatDistribute_MPIAIJ"
7 /*
8     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
9     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
10 
11     Only for square matrices
12 */
13 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
14 {
15   PetscMPIInt    rank,size;
16   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
17   PetscErrorCode ierr;
18   Mat            mat;
19   Mat_SeqAIJ     *gmata;
20   PetscMPIInt    tag;
21   MPI_Status     status;
22   PetscTruth     aij;
23   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
24 
25   PetscFunctionBegin;
26   CHKMEMQ;
27   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
28   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
29   if (!rank) {
30     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
31     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
32   }
33   if (reuse == MAT_INITIAL_MATRIX) {
34     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
35     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
36     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
37     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
38     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
39     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
40     rowners[0] = 0;
41     for (i=2; i<=size; i++) {
42       rowners[i] += rowners[i-1];
43     }
44     rstart = rowners[rank];
45     rend   = rowners[rank+1];
46     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
47     if (!rank) {
48       gmata = (Mat_SeqAIJ*) gmat->data;
49       /* send row lengths to all processors */
50       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
51       for (i=1; i<size; i++) {
52 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
53       }
54       /* determine number diagonal and off-diagonal counts */
55       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
56       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
57       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
58       jj = 0;
59       for (i=0; i<m; i++) {
60 	for (j=0; j<dlens[i]; j++) {
61           if (gmata->j[jj] < rstart) ld[i]++;
62 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
63 	  jj++;
64 	}
65       }
66       /* send column indices to other processes */
67       for (i=1; i<size; i++) {
68 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
69 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
70 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71       }
72 
73       /* send numerical values to other processes */
74       for (i=1; i<size; i++) {
75         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
76         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
77       }
78       gmataa = gmata->a;
79       gmataj = gmata->j;
80 
81     } else {
82       /* receive row lengths */
83       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
84       /* receive column indices */
85       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
86       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
87       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
88       /* determine number diagonal and off-diagonal counts */
89       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
90       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
91       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
92       jj = 0;
93       for (i=0; i<m; i++) {
94 	for (j=0; j<dlens[i]; j++) {
95           if (gmataj[jj] < rstart) ld[i]++;
96 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
97 	  jj++;
98 	}
99       }
100       /* receive numerical values */
101       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
102       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
103     }
104     /* set preallocation */
105     for (i=0; i<m; i++) {
106       dlens[i] -= olens[i];
107     }
108     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
109     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
110 
111     for (i=0; i<m; i++) {
112       dlens[i] += olens[i];
113     }
114     cnt  = 0;
115     for (i=0; i<m; i++) {
116       row  = rstart + i;
117       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
118       cnt += dlens[i];
119     }
120     if (rank) {
121       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
122     }
123     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
124     ierr = PetscFree(rowners);CHKERRQ(ierr);
125     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
126     *inmat = mat;
127   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
128     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
129     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
130     mat   = *inmat;
131     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
132     if (!rank) {
133       /* send numerical values to other processes */
134       gmata = (Mat_SeqAIJ*) gmat->data;
135       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
136       gmataa = gmata->a;
137       for (i=1; i<size; i++) {
138         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
139         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
140       }
141       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
142     } else {
143       /* receive numerical values from process 0*/
144       nz   = Ad->nz + Ao->nz;
145       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
146       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
147     }
148     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
149     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
150     ad = Ad->a;
151     ao = Ao->a;
152     if (mat->rmap->n) {
153       i  = 0;
154       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
155       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
156     }
157     for (i=1; i<mat->rmap->n; i++) {
158       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
159       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
160     }
161     i--;
162     if (mat->rmap->n) {
163       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
164     }
165     if (rank) {
166       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
167     }
168   }
169   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
170   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   CHKMEMQ;
172   PetscFunctionReturn(0);
173 }
174 
175 /*
176   Local utility routine that creates a mapping from the global column
177 number to the local number in the off-diagonal part of the local
178 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
179 a slightly higher hash table cost; without it it is not scalable (each processor
180 has an order N integer array but is fast to acess.
181 */
182 #undef __FUNCT__
183 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
184 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
185 {
186   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
187   PetscErrorCode ierr;
188   PetscInt       n = aij->B->cmap->n,i;
189 
190   PetscFunctionBegin;
191 #if defined (PETSC_USE_CTABLE)
192   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
193   for (i=0; i<n; i++){
194     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
195   }
196 #else
197   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
198   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
199   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
201 #endif
202   PetscFunctionReturn(0);
203 }
204 
205 
206 #define CHUNKSIZE   15
207 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
208 { \
209     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
210     lastcol1 = col;\
211     while (high1-low1 > 5) { \
212       t = (low1+high1)/2; \
213       if (rp1[t] > col) high1 = t; \
214       else             low1  = t; \
215     } \
216       for (_i=low1; _i<high1; _i++) { \
217         if (rp1[_i] > col) break; \
218         if (rp1[_i] == col) { \
219           if (addv == ADD_VALUES) ap1[_i] += value;   \
220           else                    ap1[_i] = value; \
221           goto a_noinsert; \
222         } \
223       }  \
224       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
225       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
226       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
227       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
228       N = nrow1++ - 1; a->nz++; high1++; \
229       /* shift up all the later entries in this row */ \
230       for (ii=N; ii>=_i; ii--) { \
231         rp1[ii+1] = rp1[ii]; \
232         ap1[ii+1] = ap1[ii]; \
233       } \
234       rp1[_i] = col;  \
235       ap1[_i] = value;  \
236       a_noinsert: ; \
237       ailen[row] = nrow1; \
238 }
239 
240 
241 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
242 { \
243     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
244     lastcol2 = col;\
245     while (high2-low2 > 5) { \
246       t = (low2+high2)/2; \
247       if (rp2[t] > col) high2 = t; \
248       else             low2  = t; \
249     } \
250     for (_i=low2; _i<high2; _i++) {		\
251       if (rp2[_i] > col) break;			\
252       if (rp2[_i] == col) {			      \
253 	if (addv == ADD_VALUES) ap2[_i] += value;     \
254 	else                    ap2[_i] = value;      \
255 	goto b_noinsert;			      \
256       }						      \
257     }							      \
258     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
259     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
260     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
261     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
262     N = nrow2++ - 1; b->nz++; high2++;					\
263     /* shift up all the later entries in this row */			\
264     for (ii=N; ii>=_i; ii--) {						\
265       rp2[ii+1] = rp2[ii];						\
266       ap2[ii+1] = ap2[ii];						\
267     }									\
268     rp2[_i] = col;							\
269     ap2[_i] = value;							\
270     b_noinsert: ;								\
271     bilen[row] = nrow2;							\
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
276 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
277 {
278   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
279   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
280   PetscErrorCode ierr;
281   PetscInt       l,*garray = mat->garray,diag;
282 
283   PetscFunctionBegin;
284   /* code only works for square matrices A */
285 
286   /* find size of row to the left of the diagonal part */
287   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
288   row  = row - diag;
289   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
290     if (garray[b->j[b->i[row]+l]] > diag) break;
291   }
292   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
293 
294   /* diagonal part */
295   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
296 
297   /* right of diagonal part */
298   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatSetValues_MPIAIJ"
304 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
305 {
306   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
307   PetscScalar    value;
308   PetscErrorCode ierr;
309   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
310   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
311   PetscTruth     roworiented = aij->roworiented;
312 
313   /* Some Variables required in the macro */
314   Mat            A = aij->A;
315   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
316   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
317   MatScalar      *aa = a->a;
318   PetscTruth     ignorezeroentries = a->ignorezeroentries;
319   Mat            B = aij->B;
320   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
321   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
322   MatScalar      *ba = b->a;
323 
324   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
325   PetscInt       nonew = a->nonew;
326   MatScalar      *ap1,*ap2;
327 
328   PetscFunctionBegin;
329   if (v) PetscValidScalarPointer(v,6);
330   for (i=0; i<m; i++) {
331     if (im[i] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
334 #endif
335     if (im[i] >= rstart && im[i] < rend) {
336       row      = im[i] - rstart;
337       lastcol1 = -1;
338       rp1      = aj + ai[row];
339       ap1      = aa + ai[row];
340       rmax1    = aimax[row];
341       nrow1    = ailen[row];
342       low1     = 0;
343       high1    = nrow1;
344       lastcol2 = -1;
345       rp2      = bj + bi[row];
346       ap2      = ba + bi[row];
347       rmax2    = bimax[row];
348       nrow2    = bilen[row];
349       low2     = 0;
350       high2    = nrow2;
351 
352       for (j=0; j<n; j++) {
353         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
354         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
355         if (in[j] >= cstart && in[j] < cend){
356           col = in[j] - cstart;
357           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
358         } else if (in[j] < 0) continue;
359 #if defined(PETSC_USE_DEBUG)
360         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
361 #endif
362         else {
363           if (mat->was_assembled) {
364             if (!aij->colmap) {
365               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
366             }
367 #if defined (PETSC_USE_CTABLE)
368             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
369 	    col--;
370 #else
371             col = aij->colmap[in[j]] - 1;
372 #endif
373             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
374               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
375               col =  in[j];
376               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
377               B = aij->B;
378               b = (Mat_SeqAIJ*)B->data;
379               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
380               rp2      = bj + bi[row];
381               ap2      = ba + bi[row];
382               rmax2    = bimax[row];
383               nrow2    = bilen[row];
384               low2     = 0;
385               high2    = nrow2;
386               bm       = aij->B->rmap->n;
387               ba = b->a;
388             }
389           } else col = in[j];
390           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
391         }
392       }
393     } else {
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) {
469     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
470   }
471   mat->insertmode = addv; /* in case this processor had no cache */
472 
473   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
481 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
485   PetscErrorCode ierr;
486   PetscMPIInt    n;
487   PetscInt       i,j,rstart,ncols,flg;
488   PetscInt       *row,*col;
489   PetscTruth     other_disassembled;
490   PetscScalar    *val;
491   InsertMode     addv = mat->insertmode;
492 
493   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!aij->donotstash) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
503         if (j < n) ncols = j-i;
504         else       ncols = n-i;
505         /* Now assemble all these values with a single function call */
506         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
507         i = j;
508       }
509     }
510     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
511   }
512   a->compressedrow.use     = PETSC_FALSE;
513   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
514   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
515 
516   /* determine if any processor has disassembled, if so we must
517      also disassemble ourselfs, in order that we may reassemble. */
518   /*
519      if nonzero structure of submatrix B cannot change then we know that
520      no processor disassembled thus we can skip this stuff
521   */
522   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
523     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
524     if (mat->was_assembled && !other_disassembled) {
525       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
526     }
527   }
528   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
529     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
530   }
531   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
532   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
533   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
534   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
535 
536   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
537   aij->rowvalues = 0;
538 
539   /* used by MatAXPY() */
540   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
541   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
542 
543   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
544   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
550 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
551 {
552   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
553   PetscErrorCode ierr;
554 
555   PetscFunctionBegin;
556   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
557   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatZeroRows_MPIAIJ"
563 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
564 {
565   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
568   PetscInt       i,*owners = A->rmap->range;
569   PetscInt       *nprocs,j,idx,nsends,row;
570   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
571   PetscInt       *rvalues,count,base,slen,*source;
572   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
573   MPI_Comm       comm = ((PetscObject)A)->comm;
574   MPI_Request    *send_waits,*recv_waits;
575   MPI_Status     recv_status,*send_status;
576 #if defined(PETSC_DEBUG)
577   PetscTruth     found = PETSC_FALSE;
578 #endif
579 
580   PetscFunctionBegin;
581   /*  first count number of contributors to each processor */
582   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
583   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
584   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
585   j = 0;
586   for (i=0; i<N; i++) {
587     if (lastidx > (idx = rows[i])) j = 0;
588     lastidx = idx;
589     for (; j<size; j++) {
590       if (idx >= owners[j] && idx < owners[j+1]) {
591         nprocs[2*j]++;
592         nprocs[2*j+1] = 1;
593         owner[i] = j;
594 #if defined(PETSC_DEBUG)
595         found = PETSC_TRUE;
596 #endif
597         break;
598       }
599     }
600 #if defined(PETSC_DEBUG)
601     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
602     found = PETSC_FALSE;
603 #endif
604   }
605   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
606 
607   /* inform other processors of number of messages and max length*/
608   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
609 
610   /* post receives:   */
611   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
612   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
613   for (i=0; i<nrecvs; i++) {
614     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
615   }
616 
617   /* do sends:
618       1) starts[i] gives the starting index in svalues for stuff going to
619          the ith processor
620   */
621   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
622   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
623   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
624   starts[0] = 0;
625   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
626   for (i=0; i<N; i++) {
627     svalues[starts[owner[i]]++] = rows[i];
628   }
629 
630   starts[0] = 0;
631   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
632   count = 0;
633   for (i=0; i<size; i++) {
634     if (nprocs[2*i+1]) {
635       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
636     }
637   }
638   ierr = PetscFree(starts);CHKERRQ(ierr);
639 
640   base = owners[rank];
641 
642   /*  wait on receives */
643   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
644   count  = nrecvs; slen = 0;
645   while (count) {
646     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
647     /* unpack receives into our local space */
648     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
649     source[imdex]  = recv_status.MPI_SOURCE;
650     lens[imdex]    = n;
651     slen          += n;
652     count--;
653   }
654   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
655 
656   /* move the data into the send scatter */
657   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
658   count = 0;
659   for (i=0; i<nrecvs; i++) {
660     values = rvalues + i*nmax;
661     for (j=0; j<lens[i]; j++) {
662       lrows[count++] = values[j] - base;
663     }
664   }
665   ierr = PetscFree(rvalues);CHKERRQ(ierr);
666   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
667   ierr = PetscFree(owner);CHKERRQ(ierr);
668   ierr = PetscFree(nprocs);CHKERRQ(ierr);
669 
670   /* actually zap the local rows */
671   /*
672         Zero the required rows. If the "diagonal block" of the matrix
673      is square and the user wishes to set the diagonal we use separate
674      code so that MatSetValues() is not called for each diagonal allocating
675      new memory, thus calling lots of mallocs and slowing things down.
676 
677   */
678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
679   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
680   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
681     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
682   } else if (diag != 0.0) {
683     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
684     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
685       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
686 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
687     }
688     for (i = 0; i < slen; i++) {
689       row  = lrows[i] + rstart;
690       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
691     }
692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694   } else {
695     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
696   }
697   ierr = PetscFree(lrows);CHKERRQ(ierr);
698 
699   /* wait on sends */
700   if (nsends) {
701     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
702     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
703     ierr = PetscFree(send_status);CHKERRQ(ierr);
704   }
705   ierr = PetscFree(send_waits);CHKERRQ(ierr);
706   ierr = PetscFree(svalues);CHKERRQ(ierr);
707 
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatMult_MPIAIJ"
713 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
714 {
715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode ierr;
717   PetscInt       nt;
718 
719   PetscFunctionBegin;
720   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
721   if (nt != A->cmap->n) {
722     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
723   }
724   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
725   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
726   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
733 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
734 {
735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatMultAdd_MPIAIJ"
745 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
746 {
747   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
752   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
753   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
754   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
760 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
761 {
762   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
763   PetscErrorCode ierr;
764   PetscTruth     merged;
765 
766   PetscFunctionBegin;
767   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
768   /* do nondiagonal part */
769   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
770   if (!merged) {
771     /* send it on its way */
772     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773     /* do local part */
774     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
775     /* receive remote parts: note this assumes the values are not actually */
776     /* added in yy until the next line, */
777     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778   } else {
779     /* do local part */
780     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
781     /* send it on its way */
782     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
783     /* values actually were received in the Begin() but we need to call this nop */
784     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 EXTERN_C_BEGIN
790 #undef __FUNCT__
791 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
792 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
793 {
794   MPI_Comm       comm;
795   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
796   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
797   IS             Me,Notme;
798   PetscErrorCode ierr;
799   PetscInt       M,N,first,last,*notme,i;
800   PetscMPIInt    size;
801 
802   PetscFunctionBegin;
803 
804   /* Easy test: symmetric diagonal block */
805   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
806   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
807   if (!*f) PetscFunctionReturn(0);
808   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
809   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
810   if (size == 1) PetscFunctionReturn(0);
811 
812   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
813   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
814   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
815   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
816   for (i=0; i<first; i++) notme[i] = i;
817   for (i=last; i<M; i++) notme[i-last+first] = i;
818   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
819   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
820   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
821   Aoff = Aoffs[0];
822   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
823   Boff = Boffs[0];
824   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
825   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
826   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
827   ierr = ISDestroy(Me);CHKERRQ(ierr);
828   ierr = ISDestroy(Notme);CHKERRQ(ierr);
829 
830   PetscFunctionReturn(0);
831 }
832 EXTERN_C_END
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
836 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
837 {
838   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
839   PetscErrorCode ierr;
840 
841   PetscFunctionBegin;
842   /* do nondiagonal part */
843   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
844   /* send it on its way */
845   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
846   /* do local part */
847   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
848   /* receive remote parts */
849   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 /*
854   This only works correctly for square matrices where the subblock A->A is the
855    diagonal block
856 */
857 #undef __FUNCT__
858 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
859 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
860 {
861   PetscErrorCode ierr;
862   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
863 
864   PetscFunctionBegin;
865   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
866   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
868   }
869   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIAIJ"
875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
876 {
877   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
882   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatDestroy_MPIAIJ"
888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
889 {
890   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_LOG)
895   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
896 #endif
897   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
898   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
899   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
900   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
901 #if defined (PETSC_USE_CTABLE)
902   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
903 #else
904   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
905 #endif
906   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
907   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
908   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
909   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
910   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
911   ierr = PetscFree(aij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatView_MPIAIJ_Binary"
927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
928 {
929   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
930   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
931   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
932   PetscErrorCode    ierr;
933   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
934   int               fd;
935   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
936   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
937   PetscScalar       *column_values;
938 
939   PetscFunctionBegin;
940   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
941   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
942   nz   = A->nz + B->nz;
943   if (!rank) {
944     header[0] = MAT_FILE_COOKIE;
945     header[1] = mat->rmap->N;
946     header[2] = mat->cmap->N;
947     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
948     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
949     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
950     /* get largest number of rows any processor has */
951     rlen = mat->rmap->n;
952     range = mat->rmap->range;
953     for (i=1; i<size; i++) {
954       rlen = PetscMax(rlen,range[i+1] - range[i]);
955     }
956   } else {
957     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
958     rlen = mat->rmap->n;
959   }
960 
961   /* load up the local row counts */
962   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
963   for (i=0; i<mat->rmap->n; i++) {
964     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
965   }
966 
967   /* store the row lengths to the file */
968   if (!rank) {
969     MPI_Status status;
970     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
971     for (i=1; i<size; i++) {
972       rlen = range[i+1] - range[i];
973       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
974       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
975     }
976   } else {
977     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
978   }
979   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
980 
981   /* load up the local column indices */
982   nzmax = nz; /* )th processor needs space a largest processor needs */
983   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
984   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
985   cnt  = 0;
986   for (i=0; i<mat->rmap->n; i++) {
987     for (j=B->i[i]; j<B->i[i+1]; j++) {
988       if ( (col = garray[B->j[j]]) > cstart) break;
989       column_indices[cnt++] = col;
990     }
991     for (k=A->i[i]; k<A->i[i+1]; k++) {
992       column_indices[cnt++] = A->j[k] + cstart;
993     }
994     for (; j<B->i[i+1]; j++) {
995       column_indices[cnt++] = garray[B->j[j]];
996     }
997   }
998   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
999 
1000   /* store the column indices to the file */
1001   if (!rank) {
1002     MPI_Status status;
1003     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1004     for (i=1; i<size; i++) {
1005       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1006       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1007       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1008       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1009     }
1010   } else {
1011     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1012     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the local column values */
1017   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<mat->rmap->n; i++) {
1020     for (j=B->i[i]; j<B->i[i+1]; j++) {
1021       if ( garray[B->j[j]] > cstart) break;
1022       column_values[cnt++] = B->a[j];
1023     }
1024     for (k=A->i[i]; k<A->i[i+1]; k++) {
1025       column_values[cnt++] = A->a[k];
1026     }
1027     for (; j<B->i[i+1]; j++) {
1028       column_values[cnt++] = B->a[j];
1029     }
1030   }
1031   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1032 
1033   /* store the column values to the file */
1034   if (!rank) {
1035     MPI_Status status;
1036     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1037     for (i=1; i<size; i++) {
1038       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1039       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1040       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1041       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1042     }
1043   } else {
1044     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1045     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046   }
1047   ierr = PetscFree(column_values);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1054 {
1055   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1056   PetscErrorCode    ierr;
1057   PetscMPIInt       rank = aij->rank,size = aij->size;
1058   PetscTruth        isdraw,iascii,isbinary;
1059   PetscViewer       sviewer;
1060   PetscViewerFormat format;
1061 
1062   PetscFunctionBegin;
1063   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1065   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1066   if (iascii) {
1067     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1068     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1069       MatInfo    info;
1070       PetscTruth inodes;
1071 
1072       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1073       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1074       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1075       if (!inodes) {
1076         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1077 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1078       } else {
1079         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1080 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1081       }
1082       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1083       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1084       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1085       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1086       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1087       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1088       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1089       PetscFunctionReturn(0);
1090     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1091       PetscInt   inodecount,inodelimit,*inodes;
1092       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1093       if (inodes) {
1094         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1095       } else {
1096         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1097       }
1098       PetscFunctionReturn(0);
1099     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1100       PetscFunctionReturn(0);
1101     }
1102   } else if (isbinary) {
1103     if (size == 1) {
1104       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106     } else {
1107       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1108     }
1109     PetscFunctionReturn(0);
1110   } else if (isdraw) {
1111     PetscDraw  draw;
1112     PetscTruth isnull;
1113     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1114     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1115   }
1116 
1117   if (size == 1) {
1118     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1119     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1120   } else {
1121     /* assemble the entire matrix onto first processor. */
1122     Mat         A;
1123     Mat_SeqAIJ  *Aloc;
1124     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1125     MatScalar   *a;
1126 
1127     if (mat->rmap->N > 1024) {
1128       PetscTruth flg = PETSC_FALSE;
1129 
1130       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1131       if (!flg) {
1132         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1133       }
1134     }
1135 
1136     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1137     if (!rank) {
1138       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1139     } else {
1140       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1141     }
1142     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1143     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1144     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1145     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1146 
1147     /* copy over the A part */
1148     Aloc = (Mat_SeqAIJ*)aij->A->data;
1149     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1150     row = mat->rmap->rstart;
1151     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1155     }
1156     aj = Aloc->j;
1157     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1158 
1159     /* copy over the B part */
1160     Aloc = (Mat_SeqAIJ*)aij->B->data;
1161     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1162     row  = mat->rmap->rstart;
1163     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1164     ct   = cols;
1165     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1166     for (i=0; i<m; i++) {
1167       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1168       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1169     }
1170     ierr = PetscFree(ct);CHKERRQ(ierr);
1171     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     /*
1174        Everyone has to call to draw the matrix since the graphics waits are
1175        synchronized across all processors that share the PetscDraw object
1176     */
1177     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1178     if (!rank) {
1179       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1180       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1181     }
1182     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     ierr = MatDestroy(A);CHKERRQ(ierr);
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_MPIAIJ"
1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1191 {
1192   PetscErrorCode ierr;
1193   PetscTruth     iascii,isdraw,issocket,isbinary;
1194 
1195   PetscFunctionBegin;
1196   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1197   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1200   if (iascii || isdraw || isbinary || issocket) {
1201     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1202   } else {
1203     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1204   }
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatSOR_MPIAIJ"
1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1211 {
1212   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1213   PetscErrorCode ierr;
1214   Vec            bb1 = 0;
1215   PetscTruth     hasop;
1216 
1217   PetscFunctionBegin;
1218   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1219     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1220   }
1221 
1222   if (flag == SOR_APPLY_UPPER) {
1223     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1224     PetscFunctionReturn(0);
1225   }
1226 
1227   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1228     if (flag & SOR_ZERO_INITIAL_GUESS) {
1229       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1230       its--;
1231     }
1232 
1233     while (its--) {
1234       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1236 
1237       /* update rhs: bb1 = bb - B*x */
1238       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1239       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1240 
1241       /* local sweep */
1242       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1243     }
1244   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1245     if (flag & SOR_ZERO_INITIAL_GUESS) {
1246       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1247       its--;
1248     }
1249     while (its--) {
1250       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1251       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1252 
1253       /* update rhs: bb1 = bb - B*x */
1254       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1255       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1256 
1257       /* local sweep */
1258       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1259     }
1260   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1261     if (flag & SOR_ZERO_INITIAL_GUESS) {
1262       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1263       its--;
1264     }
1265     while (its--) {
1266       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268 
1269       /* update rhs: bb1 = bb - B*x */
1270       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1271       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1272 
1273       /* local sweep */
1274       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1275     }
1276   }  else if (flag & SOR_EISENSTAT) {
1277     Vec         xx1;
1278 
1279     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1280     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1281 
1282     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284     if (!mat->diag) {
1285       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1286       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1287     }
1288     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1289     if (hasop) {
1290       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1291     } else {
1292       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1293     }
1294     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1295 
1296     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1297 
1298     /* local sweep */
1299     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1300     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1301     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1302   } else {
1303     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1304   }
1305 
1306   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatPermute_MPIAIJ"
1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1313 {
1314   MPI_Comm       comm,pcomm;
1315   PetscInt       first,local_size,nrows;
1316   const PetscInt *rows;
1317   PetscMPIInt    size;
1318   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1319   PetscErrorCode ierr;
1320 
1321   PetscFunctionBegin;
1322   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1323   /* make a collective version of 'rowp' */
1324   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1325   if (pcomm==comm) {
1326     crowp = rowp;
1327   } else {
1328     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1329     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1330     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1331     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1332   }
1333   /* collect the global row permutation and invert it */
1334   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1335   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1336   if (pcomm!=comm) {
1337     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1338   }
1339   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1340   /* get the local target indices */
1341   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1342   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1343   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1344   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1345   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1346   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1347   /* the column permutation is so much easier;
1348      make a local version of 'colp' and invert it */
1349   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1350   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1351   if (size==1) {
1352     lcolp = colp;
1353   } else {
1354     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1355     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1357   }
1358   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1359   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1360   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1361   if (size>1) {
1362     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1363     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1364   }
1365   /* now we just get the submatrix */
1366   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1367   /* clean up */
1368   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1369   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1376 {
1377   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1378   Mat            A = mat->A,B = mat->B;
1379   PetscErrorCode ierr;
1380   PetscReal      isend[5],irecv[5];
1381 
1382   PetscFunctionBegin;
1383   info->block_size     = 1.0;
1384   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1385   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1386   isend[3] = info->memory;  isend[4] = info->mallocs;
1387   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1388   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1389   isend[3] += info->memory;  isend[4] += info->mallocs;
1390   if (flag == MAT_LOCAL) {
1391     info->nz_used      = isend[0];
1392     info->nz_allocated = isend[1];
1393     info->nz_unneeded  = isend[2];
1394     info->memory       = isend[3];
1395     info->mallocs      = isend[4];
1396   } else if (flag == MAT_GLOBAL_MAX) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   } else if (flag == MAT_GLOBAL_SUM) {
1404     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1405     info->nz_used      = irecv[0];
1406     info->nz_allocated = irecv[1];
1407     info->nz_unneeded  = irecv[2];
1408     info->memory       = irecv[3];
1409     info->mallocs      = irecv[4];
1410   }
1411   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1412   info->fill_ratio_needed = 0;
1413   info->factor_mallocs    = 0;
1414 
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatSetOption_MPIAIJ"
1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1421 {
1422   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1423   PetscErrorCode ierr;
1424 
1425   PetscFunctionBegin;
1426   switch (op) {
1427   case MAT_NEW_NONZERO_LOCATIONS:
1428   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1429   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1430   case MAT_KEEP_NONZERO_PATTERN:
1431   case MAT_NEW_NONZERO_LOCATION_ERR:
1432   case MAT_USE_INODES:
1433   case MAT_IGNORE_ZERO_ENTRIES:
1434     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1435     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1436     break;
1437   case MAT_ROW_ORIENTED:
1438     a->roworiented = flg;
1439     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1440     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1441     break;
1442   case MAT_NEW_DIAGONALS:
1443     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1444     break;
1445   case MAT_IGNORE_OFF_PROC_ENTRIES:
1446     a->donotstash = PETSC_TRUE;
1447     break;
1448   case MAT_SYMMETRIC:
1449     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1450     break;
1451   case MAT_STRUCTURALLY_SYMMETRIC:
1452     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1453     break;
1454   case MAT_HERMITIAN:
1455     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1456     break;
1457   case MAT_SYMMETRY_ETERNAL:
1458     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1459     break;
1460   default:
1461     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1462   }
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatGetRow_MPIAIJ"
1468 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1469 {
1470   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1471   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1472   PetscErrorCode ierr;
1473   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1474   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1475   PetscInt       *cmap,*idx_p;
1476 
1477   PetscFunctionBegin;
1478   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1479   mat->getrowactive = PETSC_TRUE;
1480 
1481   if (!mat->rowvalues && (idx || v)) {
1482     /*
1483         allocate enough space to hold information from the longest row.
1484     */
1485     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1486     PetscInt   max = 1,tmp;
1487     for (i=0; i<matin->rmap->n; i++) {
1488       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1489       if (max < tmp) { max = tmp; }
1490     }
1491     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1492   }
1493 
1494   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1495   lrow = row - rstart;
1496 
1497   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1498   if (!v)   {pvA = 0; pvB = 0;}
1499   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1500   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1501   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1502   nztot = nzA + nzB;
1503 
1504   cmap  = mat->garray;
1505   if (v  || idx) {
1506     if (nztot) {
1507       /* Sort by increasing column numbers, assuming A and B already sorted */
1508       PetscInt imark = -1;
1509       if (v) {
1510         *v = v_p = mat->rowvalues;
1511         for (i=0; i<nzB; i++) {
1512           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1513           else break;
1514         }
1515         imark = i;
1516         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1517         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1518       }
1519       if (idx) {
1520         *idx = idx_p = mat->rowindices;
1521         if (imark > -1) {
1522           for (i=0; i<imark; i++) {
1523             idx_p[i] = cmap[cworkB[i]];
1524           }
1525         } else {
1526           for (i=0; i<nzB; i++) {
1527             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1528             else break;
1529           }
1530           imark = i;
1531         }
1532         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1533         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1534       }
1535     } else {
1536       if (idx) *idx = 0;
1537       if (v)   *v   = 0;
1538     }
1539   }
1540   *nz = nztot;
1541   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1542   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1548 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1549 {
1550   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1551 
1552   PetscFunctionBegin;
1553   if (!aij->getrowactive) {
1554     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1555   }
1556   aij->getrowactive = PETSC_FALSE;
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MatNorm_MPIAIJ"
1562 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1563 {
1564   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1565   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1566   PetscErrorCode ierr;
1567   PetscInt       i,j,cstart = mat->cmap->rstart;
1568   PetscReal      sum = 0.0;
1569   MatScalar      *v;
1570 
1571   PetscFunctionBegin;
1572   if (aij->size == 1) {
1573     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1574   } else {
1575     if (type == NORM_FROBENIUS) {
1576       v = amat->a;
1577       for (i=0; i<amat->nz; i++) {
1578 #if defined(PETSC_USE_COMPLEX)
1579         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1580 #else
1581         sum += (*v)*(*v); v++;
1582 #endif
1583       }
1584       v = bmat->a;
1585       for (i=0; i<bmat->nz; i++) {
1586 #if defined(PETSC_USE_COMPLEX)
1587         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1588 #else
1589         sum += (*v)*(*v); v++;
1590 #endif
1591       }
1592       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1593       *norm = sqrt(*norm);
1594     } else if (type == NORM_1) { /* max column norm */
1595       PetscReal *tmp,*tmp2;
1596       PetscInt  *jj,*garray = aij->garray;
1597       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1598       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1599       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1600       *norm = 0.0;
1601       v = amat->a; jj = amat->j;
1602       for (j=0; j<amat->nz; j++) {
1603         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1604       }
1605       v = bmat->a; jj = bmat->j;
1606       for (j=0; j<bmat->nz; j++) {
1607         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1608       }
1609       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1610       for (j=0; j<mat->cmap->N; j++) {
1611         if (tmp2[j] > *norm) *norm = tmp2[j];
1612       }
1613       ierr = PetscFree(tmp);CHKERRQ(ierr);
1614       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1615     } else if (type == NORM_INFINITY) { /* max row norm */
1616       PetscReal ntemp = 0.0;
1617       for (j=0; j<aij->A->rmap->n; j++) {
1618         v = amat->a + amat->i[j];
1619         sum = 0.0;
1620         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1621           sum += PetscAbsScalar(*v); v++;
1622         }
1623         v = bmat->a + bmat->i[j];
1624         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1625           sum += PetscAbsScalar(*v); v++;
1626         }
1627         if (sum > ntemp) ntemp = sum;
1628       }
1629       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1630     } else {
1631       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1632     }
1633   }
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 #undef __FUNCT__
1638 #define __FUNCT__ "MatTranspose_MPIAIJ"
1639 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1640 {
1641   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1642   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1643   PetscErrorCode ierr;
1644   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1645   PetscInt       cstart=A->cmap->rstart,ncol;
1646   Mat            B;
1647   MatScalar      *array;
1648 
1649   PetscFunctionBegin;
1650   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1651 
1652   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1653   ai = Aloc->i; aj = Aloc->j;
1654   bi = Bloc->i; bj = Bloc->j;
1655   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1656     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1657     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1658     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1659     for (i=0; i<ai[ma]; i++){
1660       d_nnz[aj[i]] ++;
1661       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1662     }
1663 
1664     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1665     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1666     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1667     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1668     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1669   } else {
1670     B = *matout;
1671   }
1672 
1673   /* copy over the A part */
1674   array = Aloc->a;
1675   row = A->rmap->rstart;
1676   for (i=0; i<ma; i++) {
1677     ncol = ai[i+1]-ai[i];
1678     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1679     row++; array += ncol; aj += ncol;
1680   }
1681   aj = Aloc->j;
1682   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1683 
1684   /* copy over the B part */
1685   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1686   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1687   array = Bloc->a;
1688   row = A->rmap->rstart;
1689   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1690   cols_tmp = cols;
1691   for (i=0; i<mb; i++) {
1692     ncol = bi[i+1]-bi[i];
1693     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1694     row++; array += ncol; cols_tmp += ncol;
1695   }
1696   ierr = PetscFree(cols);CHKERRQ(ierr);
1697 
1698   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1699   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1701     *matout = B;
1702   } else {
1703     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1710 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1711 {
1712   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1713   Mat            a = aij->A,b = aij->B;
1714   PetscErrorCode ierr;
1715   PetscInt       s1,s2,s3;
1716 
1717   PetscFunctionBegin;
1718   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1719   if (rr) {
1720     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1721     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1722     /* Overlap communication with computation. */
1723     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1724   }
1725   if (ll) {
1726     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1727     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1728     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1729   }
1730   /* scale  the diagonal block */
1731   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1732 
1733   if (rr) {
1734     /* Do a scatter end and then right scale the off-diagonal block */
1735     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1736     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1737   }
1738 
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1744 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1745 {
1746   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1747   PetscErrorCode ierr;
1748 
1749   PetscFunctionBegin;
1750   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1751   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1752   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1753   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1754   PetscFunctionReturn(0);
1755 }
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1758 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1759 {
1760   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1761   PetscErrorCode ierr;
1762 
1763   PetscFunctionBegin;
1764   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 #undef __FUNCT__
1769 #define __FUNCT__ "MatEqual_MPIAIJ"
1770 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1771 {
1772   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1773   Mat            a,b,c,d;
1774   PetscTruth     flg;
1775   PetscErrorCode ierr;
1776 
1777   PetscFunctionBegin;
1778   a = matA->A; b = matA->B;
1779   c = matB->A; d = matB->B;
1780 
1781   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1782   if (flg) {
1783     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1784   }
1785   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 #undef __FUNCT__
1790 #define __FUNCT__ "MatCopy_MPIAIJ"
1791 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1792 {
1793   PetscErrorCode ierr;
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1795   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1796 
1797   PetscFunctionBegin;
1798   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1799   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1800     /* because of the column compression in the off-processor part of the matrix a->B,
1801        the number of columns in a->B and b->B may be different, hence we cannot call
1802        the MatCopy() directly on the two parts. If need be, we can provide a more
1803        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1804        then copying the submatrices */
1805     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1806   } else {
1807     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1808     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1809   }
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1815 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1816 {
1817   PetscErrorCode ierr;
1818 
1819   PetscFunctionBegin;
1820   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 #include "petscblaslapack.h"
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatAXPY_MPIAIJ"
1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1828 {
1829   PetscErrorCode ierr;
1830   PetscInt       i;
1831   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1832   PetscBLASInt   bnz,one=1;
1833   Mat_SeqAIJ     *x,*y;
1834 
1835   PetscFunctionBegin;
1836   if (str == SAME_NONZERO_PATTERN) {
1837     PetscScalar alpha = a;
1838     x = (Mat_SeqAIJ *)xx->A->data;
1839     y = (Mat_SeqAIJ *)yy->A->data;
1840     bnz = PetscBLASIntCast(x->nz);
1841     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1842     x = (Mat_SeqAIJ *)xx->B->data;
1843     y = (Mat_SeqAIJ *)yy->B->data;
1844     bnz = PetscBLASIntCast(x->nz);
1845     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1846   } else if (str == SUBSET_NONZERO_PATTERN) {
1847     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1848 
1849     x = (Mat_SeqAIJ *)xx->B->data;
1850     y = (Mat_SeqAIJ *)yy->B->data;
1851     if (y->xtoy && y->XtoY != xx->B) {
1852       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1853       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1854     }
1855     if (!y->xtoy) { /* get xtoy */
1856       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1857       y->XtoY = xx->B;
1858       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1859     }
1860     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1861   } else {
1862     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1863   }
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatConjugate_MPIAIJ"
1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1872 {
1873 #if defined(PETSC_USE_COMPLEX)
1874   PetscErrorCode ierr;
1875   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1876 
1877   PetscFunctionBegin;
1878   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1879   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1880 #else
1881   PetscFunctionBegin;
1882 #endif
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatRealPart_MPIAIJ"
1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1889 {
1890   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1891   PetscErrorCode ierr;
1892 
1893   PetscFunctionBegin;
1894   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1895   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1902 {
1903   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1904   PetscErrorCode ierr;
1905 
1906   PetscFunctionBegin;
1907   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1908   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #ifdef PETSC_HAVE_PBGL
1913 
1914 #include <boost/parallel/mpi/bsp_process_group.hpp>
1915 #include <boost/graph/distributed/ilu_default_graph.hpp>
1916 #include <boost/graph/distributed/ilu_0_block.hpp>
1917 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1918 #include <boost/graph/distributed/petsc/interface.hpp>
1919 #include <boost/multi_array.hpp>
1920 #include <boost/parallel/distributed_property_map->hpp>
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1924 /*
1925   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1926 */
1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1928 {
1929   namespace petsc = boost::distributed::petsc;
1930 
1931   namespace graph_dist = boost::graph::distributed;
1932   using boost::graph::distributed::ilu_default::process_group_type;
1933   using boost::graph::ilu_permuted;
1934 
1935   PetscTruth      row_identity, col_identity;
1936   PetscContainer  c;
1937   PetscInt        m, n, M, N;
1938   PetscErrorCode  ierr;
1939 
1940   PetscFunctionBegin;
1941   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1942   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1943   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1944   if (!row_identity || !col_identity) {
1945     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1946   }
1947 
1948   process_group_type pg;
1949   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1950   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1951   lgraph_type&   level_graph = *lgraph_p;
1952   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1953 
1954   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1955   ilu_permuted(level_graph);
1956 
1957   /* put together the new matrix */
1958   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1959   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1960   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1961   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1962   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1963   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1964   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1965 
1966   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1967   ierr = PetscContainerSetPointer(c, lgraph_p);
1968   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1975 {
1976   PetscFunctionBegin;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatSolve_MPIAIJ"
1982 /*
1983   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1984 */
1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1986 {
1987   namespace graph_dist = boost::graph::distributed;
1988 
1989   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1990   lgraph_type*   lgraph_p;
1991   PetscContainer c;
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1996   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1997   ierr = VecCopy(b, x);CHKERRQ(ierr);
1998 
1999   PetscScalar* array_x;
2000   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2001   PetscInt sx;
2002   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2003 
2004   PetscScalar* array_b;
2005   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2006   PetscInt sb;
2007   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2008 
2009   lgraph_type&   level_graph = *lgraph_p;
2010   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2011 
2012   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2013   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2014                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2015 
2016   typedef boost::iterator_property_map<array_ref_type::iterator,
2017                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2018   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2019                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2020 
2021   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2022 
2023   PetscFunctionReturn(0);
2024 }
2025 #endif
2026 
2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2028   PetscInt       nzlocal,nsends,nrecvs;
2029   PetscMPIInt    *send_rank,*recv_rank;
2030   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2031   PetscScalar    *sbuf_a,**rbuf_a;
2032   PetscErrorCode (*MatDestroy)(Mat);
2033 } Mat_Redundant;
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2038 {
2039   PetscErrorCode       ierr;
2040   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2041   PetscInt             i;
2042 
2043   PetscFunctionBegin;
2044   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2045   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2046   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2047   for (i=0; i<redund->nrecvs; i++){
2048     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2049     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2050   }
2051   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2052   ierr = PetscFree(redund);CHKERRQ(ierr);
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 #undef __FUNCT__
2057 #define __FUNCT__ "MatDestroy_MatRedundant"
2058 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2059 {
2060   PetscErrorCode  ierr;
2061   PetscContainer  container;
2062   Mat_Redundant   *redund=PETSC_NULL;
2063 
2064   PetscFunctionBegin;
2065   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2066   if (container) {
2067     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2068   } else {
2069     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2070   }
2071   A->ops->destroy = redund->MatDestroy;
2072   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2073   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2074   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2081 {
2082   PetscMPIInt    rank,size;
2083   MPI_Comm       comm=((PetscObject)mat)->comm;
2084   PetscErrorCode ierr;
2085   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2086   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2087   PetscInt       *rowrange=mat->rmap->range;
2088   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2089   Mat            A=aij->A,B=aij->B,C=*matredundant;
2090   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2091   PetscScalar    *sbuf_a;
2092   PetscInt       nzlocal=a->nz+b->nz;
2093   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2094   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2095   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2096   MatScalar      *aworkA,*aworkB;
2097   PetscScalar    *vals;
2098   PetscMPIInt    tag1,tag2,tag3,imdex;
2099   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2100                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2101   MPI_Status     recv_status,*send_status;
2102   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2103   PetscInt       **rbuf_j=PETSC_NULL;
2104   PetscScalar    **rbuf_a=PETSC_NULL;
2105   Mat_Redundant  *redund=PETSC_NULL;
2106   PetscContainer container;
2107 
2108   PetscFunctionBegin;
2109   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2111 
2112   if (reuse == MAT_REUSE_MATRIX) {
2113     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2114     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2115     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2116     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2117     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2118     if (container) {
2119       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2120     } else {
2121       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
2122     }
2123     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2124 
2125     nsends    = redund->nsends;
2126     nrecvs    = redund->nrecvs;
2127     send_rank = redund->send_rank;
2128     recv_rank = redund->recv_rank;
2129     sbuf_nz   = redund->sbuf_nz;
2130     rbuf_nz   = redund->rbuf_nz;
2131     sbuf_j    = redund->sbuf_j;
2132     sbuf_a    = redund->sbuf_a;
2133     rbuf_j    = redund->rbuf_j;
2134     rbuf_a    = redund->rbuf_a;
2135   }
2136 
2137   if (reuse == MAT_INITIAL_MATRIX){
2138     PetscMPIInt  subrank,subsize;
2139     PetscInt     nleftover,np_subcomm;
2140     /* get the destination processors' id send_rank, nsends and nrecvs */
2141     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2142     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2143     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2144     np_subcomm = size/nsubcomm;
2145     nleftover  = size - nsubcomm*np_subcomm;
2146     nsends = 0; nrecvs = 0;
2147     for (i=0; i<size; i++){ /* i=rank*/
2148       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2149         send_rank[nsends] = i; nsends++;
2150         recv_rank[nrecvs++] = i;
2151       }
2152     }
2153     if (rank >= size - nleftover){/* this proc is a leftover processor */
2154       i = size-nleftover-1;
2155       j = 0;
2156       while (j < nsubcomm - nleftover){
2157         send_rank[nsends++] = i;
2158         i--; j++;
2159       }
2160     }
2161 
2162     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2163       for (i=0; i<nleftover; i++){
2164         recv_rank[nrecvs++] = size-nleftover+i;
2165       }
2166     }
2167 
2168     /* allocate sbuf_j, sbuf_a */
2169     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2170     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2171     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2172   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2173 
2174   /* copy mat's local entries into the buffers */
2175   if (reuse == MAT_INITIAL_MATRIX){
2176     rownz_max = 0;
2177     rptr = sbuf_j;
2178     cols = sbuf_j + rend-rstart + 1;
2179     vals = sbuf_a;
2180     rptr[0] = 0;
2181     for (i=0; i<rend-rstart; i++){
2182       row = i + rstart;
2183       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2184       ncols  = nzA + nzB;
2185       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2186       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2187       /* load the column indices for this row into cols */
2188       lwrite = 0;
2189       for (l=0; l<nzB; l++) {
2190         if ((ctmp = bmap[cworkB[l]]) < cstart){
2191           vals[lwrite]   = aworkB[l];
2192           cols[lwrite++] = ctmp;
2193         }
2194       }
2195       for (l=0; l<nzA; l++){
2196         vals[lwrite]   = aworkA[l];
2197         cols[lwrite++] = cstart + cworkA[l];
2198       }
2199       for (l=0; l<nzB; l++) {
2200         if ((ctmp = bmap[cworkB[l]]) >= cend){
2201           vals[lwrite]   = aworkB[l];
2202           cols[lwrite++] = ctmp;
2203         }
2204       }
2205       vals += ncols;
2206       cols += ncols;
2207       rptr[i+1] = rptr[i] + ncols;
2208       if (rownz_max < ncols) rownz_max = ncols;
2209     }
2210     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2211   } else { /* only copy matrix values into sbuf_a */
2212     rptr = sbuf_j;
2213     vals = sbuf_a;
2214     rptr[0] = 0;
2215     for (i=0; i<rend-rstart; i++){
2216       row = i + rstart;
2217       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2218       ncols  = nzA + nzB;
2219       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2220       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2221       lwrite = 0;
2222       for (l=0; l<nzB; l++) {
2223         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2224       }
2225       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2226       for (l=0; l<nzB; l++) {
2227         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2228       }
2229       vals += ncols;
2230       rptr[i+1] = rptr[i] + ncols;
2231     }
2232   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2233 
2234   /* send nzlocal to others, and recv other's nzlocal */
2235   /*--------------------------------------------------*/
2236   if (reuse == MAT_INITIAL_MATRIX){
2237     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2238     s_waits2 = s_waits3 + nsends;
2239     s_waits1 = s_waits2 + nsends;
2240     r_waits1 = s_waits1 + nsends;
2241     r_waits2 = r_waits1 + nrecvs;
2242     r_waits3 = r_waits2 + nrecvs;
2243   } else {
2244     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2245     r_waits3 = s_waits3 + nsends;
2246   }
2247 
2248   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2249   if (reuse == MAT_INITIAL_MATRIX){
2250     /* get new tags to keep the communication clean */
2251     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2252     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2253     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2254 
2255     /* post receives of other's nzlocal */
2256     for (i=0; i<nrecvs; i++){
2257       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2258     }
2259     /* send nzlocal to others */
2260     for (i=0; i<nsends; i++){
2261       sbuf_nz[i] = nzlocal;
2262       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2263     }
2264     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2265     count = nrecvs;
2266     while (count) {
2267       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2268       recv_rank[imdex] = recv_status.MPI_SOURCE;
2269       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2270       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2271 
2272       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2273       rbuf_nz[imdex] += i + 2;
2274       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2275       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2276       count--;
2277     }
2278     /* wait on sends of nzlocal */
2279     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2280     /* send mat->i,j to others, and recv from other's */
2281     /*------------------------------------------------*/
2282     for (i=0; i<nsends; i++){
2283       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2284       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2285     }
2286     /* wait on receives of mat->i,j */
2287     /*------------------------------*/
2288     count = nrecvs;
2289     while (count) {
2290       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2291       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2292       count--;
2293     }
2294     /* wait on sends of mat->i,j */
2295     /*---------------------------*/
2296     if (nsends) {
2297       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2298     }
2299   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2300 
2301   /* post receives, send and receive mat->a */
2302   /*----------------------------------------*/
2303   for (imdex=0; imdex<nrecvs; imdex++) {
2304     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2305   }
2306   for (i=0; i<nsends; i++){
2307     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2308   }
2309   count = nrecvs;
2310   while (count) {
2311     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2312     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2313     count--;
2314   }
2315   if (nsends) {
2316     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2317   }
2318 
2319   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2320 
2321   /* create redundant matrix */
2322   /*-------------------------*/
2323   if (reuse == MAT_INITIAL_MATRIX){
2324     /* compute rownz_max for preallocation */
2325     for (imdex=0; imdex<nrecvs; imdex++){
2326       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2327       rptr = rbuf_j[imdex];
2328       for (i=0; i<j; i++){
2329         ncols = rptr[i+1] - rptr[i];
2330         if (rownz_max < ncols) rownz_max = ncols;
2331       }
2332     }
2333 
2334     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2335     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2336     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2337     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2338     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2339   } else {
2340     C = *matredundant;
2341   }
2342 
2343   /* insert local matrix entries */
2344   rptr = sbuf_j;
2345   cols = sbuf_j + rend-rstart + 1;
2346   vals = sbuf_a;
2347   for (i=0; i<rend-rstart; i++){
2348     row   = i + rstart;
2349     ncols = rptr[i+1] - rptr[i];
2350     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2351     vals += ncols;
2352     cols += ncols;
2353   }
2354   /* insert received matrix entries */
2355   for (imdex=0; imdex<nrecvs; imdex++){
2356     rstart = rowrange[recv_rank[imdex]];
2357     rend   = rowrange[recv_rank[imdex]+1];
2358     rptr = rbuf_j[imdex];
2359     cols = rbuf_j[imdex] + rend-rstart + 1;
2360     vals = rbuf_a[imdex];
2361     for (i=0; i<rend-rstart; i++){
2362       row   = i + rstart;
2363       ncols = rptr[i+1] - rptr[i];
2364       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2365       vals += ncols;
2366       cols += ncols;
2367     }
2368   }
2369   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2370   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2371   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2372   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2373   if (reuse == MAT_INITIAL_MATRIX){
2374     PetscContainer container;
2375     *matredundant = C;
2376     /* create a supporting struct and attach it to C for reuse */
2377     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2378     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2379     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2380     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2381     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2382 
2383     redund->nzlocal = nzlocal;
2384     redund->nsends  = nsends;
2385     redund->nrecvs  = nrecvs;
2386     redund->send_rank = send_rank;
2387     redund->recv_rank = recv_rank;
2388     redund->sbuf_nz = sbuf_nz;
2389     redund->rbuf_nz = rbuf_nz;
2390     redund->sbuf_j  = sbuf_j;
2391     redund->sbuf_a  = sbuf_a;
2392     redund->rbuf_j  = rbuf_j;
2393     redund->rbuf_a  = rbuf_a;
2394 
2395     redund->MatDestroy = C->ops->destroy;
2396     C->ops->destroy    = MatDestroy_MatRedundant;
2397   }
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2404 {
2405   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2406   PetscErrorCode ierr;
2407   PetscInt       i,*idxb = 0;
2408   PetscScalar    *va,*vb;
2409   Vec            vtmp;
2410 
2411   PetscFunctionBegin;
2412   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2413   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2414   if (idx) {
2415     for (i=0; i<A->rmap->n; i++) {
2416       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2417     }
2418   }
2419 
2420   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2421   if (idx) {
2422     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2423   }
2424   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2425   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2426 
2427   for (i=0; i<A->rmap->n; i++){
2428     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2429       va[i] = vb[i];
2430       if (idx) idx[i] = a->garray[idxb[i]];
2431     }
2432   }
2433 
2434   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2435   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2436   if (idxb) {
2437     ierr = PetscFree(idxb);CHKERRQ(ierr);
2438   }
2439   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2446 {
2447   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2448   PetscErrorCode ierr;
2449   PetscInt       i,*idxb = 0;
2450   PetscScalar    *va,*vb;
2451   Vec            vtmp;
2452 
2453   PetscFunctionBegin;
2454   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2455   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2456   if (idx) {
2457     for (i=0; i<A->cmap->n; i++) {
2458       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2459     }
2460   }
2461 
2462   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2463   if (idx) {
2464     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2465   }
2466   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2467   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2468 
2469   for (i=0; i<A->rmap->n; i++){
2470     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2471       va[i] = vb[i];
2472       if (idx) idx[i] = a->garray[idxb[i]];
2473     }
2474   }
2475 
2476   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2477   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2478   if (idxb) {
2479     ierr = PetscFree(idxb);CHKERRQ(ierr);
2480   }
2481   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2488 {
2489   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2490   PetscInt       n      = A->rmap->n;
2491   PetscInt       cstart = A->cmap->rstart;
2492   PetscInt      *cmap   = mat->garray;
2493   PetscInt      *diagIdx, *offdiagIdx;
2494   Vec            diagV, offdiagV;
2495   PetscScalar   *a, *diagA, *offdiagA;
2496   PetscInt       r;
2497   PetscErrorCode ierr;
2498 
2499   PetscFunctionBegin;
2500   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2501   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2502   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2503   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2504   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2505   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2506   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2507   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2508   for(r = 0; r < n; ++r) {
2509     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2510       a[r]   = diagA[r];
2511       idx[r] = cstart + diagIdx[r];
2512     } else {
2513       a[r]   = offdiagA[r];
2514       idx[r] = cmap[offdiagIdx[r]];
2515     }
2516   }
2517   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2518   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2519   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2520   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2521   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2522   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2529 {
2530   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2531   PetscInt       n      = A->rmap->n;
2532   PetscInt       cstart = A->cmap->rstart;
2533   PetscInt      *cmap   = mat->garray;
2534   PetscInt      *diagIdx, *offdiagIdx;
2535   Vec            diagV, offdiagV;
2536   PetscScalar   *a, *diagA, *offdiagA;
2537   PetscInt       r;
2538   PetscErrorCode ierr;
2539 
2540   PetscFunctionBegin;
2541   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2542   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2543   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2544   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2545   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2546   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2547   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2548   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2549   for(r = 0; r < n; ++r) {
2550     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2551       a[r]   = diagA[r];
2552       idx[r] = cstart + diagIdx[r];
2553     } else {
2554       a[r]   = offdiagA[r];
2555       idx[r] = cmap[offdiagIdx[r]];
2556     }
2557   }
2558   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2559   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2560   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2561   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2562   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2563   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 #undef __FUNCT__
2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2570 {
2571   PetscErrorCode ierr;
2572   Mat            *dummy;
2573 
2574   PetscFunctionBegin;
2575   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2576   *newmat = *dummy;
2577   ierr = PetscFree(dummy);CHKERRQ(ierr);
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2582 /* -------------------------------------------------------------------*/
2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2584        MatGetRow_MPIAIJ,
2585        MatRestoreRow_MPIAIJ,
2586        MatMult_MPIAIJ,
2587 /* 4*/ MatMultAdd_MPIAIJ,
2588        MatMultTranspose_MPIAIJ,
2589        MatMultTransposeAdd_MPIAIJ,
2590 #ifdef PETSC_HAVE_PBGL
2591        MatSolve_MPIAIJ,
2592 #else
2593        0,
2594 #endif
2595        0,
2596        0,
2597 /*10*/ 0,
2598        0,
2599        0,
2600        MatSOR_MPIAIJ,
2601        MatTranspose_MPIAIJ,
2602 /*15*/ MatGetInfo_MPIAIJ,
2603        MatEqual_MPIAIJ,
2604        MatGetDiagonal_MPIAIJ,
2605        MatDiagonalScale_MPIAIJ,
2606        MatNorm_MPIAIJ,
2607 /*20*/ MatAssemblyBegin_MPIAIJ,
2608        MatAssemblyEnd_MPIAIJ,
2609        MatSetOption_MPIAIJ,
2610        MatZeroEntries_MPIAIJ,
2611 /*24*/ MatZeroRows_MPIAIJ,
2612        0,
2613 #ifdef PETSC_HAVE_PBGL
2614        0,
2615 #else
2616        0,
2617 #endif
2618        0,
2619        0,
2620 /*29*/ MatSetUpPreallocation_MPIAIJ,
2621 #ifdef PETSC_HAVE_PBGL
2622        0,
2623 #else
2624        0,
2625 #endif
2626        0,
2627        0,
2628        0,
2629 /*34*/ MatDuplicate_MPIAIJ,
2630        0,
2631        0,
2632        0,
2633        0,
2634 /*39*/ MatAXPY_MPIAIJ,
2635        MatGetSubMatrices_MPIAIJ,
2636        MatIncreaseOverlap_MPIAIJ,
2637        MatGetValues_MPIAIJ,
2638        MatCopy_MPIAIJ,
2639 /*44*/ MatGetRowMax_MPIAIJ,
2640        MatScale_MPIAIJ,
2641        0,
2642        0,
2643        0,
2644 /*49*/ MatSetBlockSize_MPIAIJ,
2645        0,
2646        0,
2647        0,
2648        0,
2649 /*54*/ MatFDColoringCreate_MPIAIJ,
2650        0,
2651        MatSetUnfactored_MPIAIJ,
2652        MatPermute_MPIAIJ,
2653        0,
2654 /*59*/ MatGetSubMatrix_MPIAIJ,
2655        MatDestroy_MPIAIJ,
2656        MatView_MPIAIJ,
2657        0,
2658        0,
2659 /*64*/ 0,
2660        0,
2661        0,
2662        0,
2663        0,
2664 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2665        MatGetRowMinAbs_MPIAIJ,
2666        0,
2667        MatSetColoring_MPIAIJ,
2668 #if defined(PETSC_HAVE_ADIC)
2669        MatSetValuesAdic_MPIAIJ,
2670 #else
2671        0,
2672 #endif
2673        MatSetValuesAdifor_MPIAIJ,
2674 /*75*/ MatFDColoringApply_AIJ,
2675        0,
2676        0,
2677        0,
2678        0,
2679 /*80*/ 0,
2680        0,
2681        0,
2682 /*83*/ MatLoad_MPIAIJ,
2683        0,
2684        0,
2685        0,
2686        0,
2687        0,
2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2689        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2690        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2691        MatPtAP_Basic,
2692        MatPtAPSymbolic_MPIAIJ,
2693 /*94*/ MatPtAPNumeric_MPIAIJ,
2694        0,
2695        0,
2696        0,
2697        0,
2698 /*99*/ 0,
2699        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2700        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2701        MatConjugate_MPIAIJ,
2702        0,
2703 /*104*/MatSetValuesRow_MPIAIJ,
2704        MatRealPart_MPIAIJ,
2705        MatImaginaryPart_MPIAIJ,
2706        0,
2707        0,
2708 /*109*/0,
2709        MatGetRedundantMatrix_MPIAIJ,
2710        MatGetRowMin_MPIAIJ,
2711        0,
2712        0,
2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2714        0,
2715        0,
2716        0,
2717        0,
2718        0
2719 };
2720 
2721 /* ----------------------------------------------------------------------------------------*/
2722 
2723 EXTERN_C_BEGIN
2724 #undef __FUNCT__
2725 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2726 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2727 {
2728   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2729   PetscErrorCode ierr;
2730 
2731   PetscFunctionBegin;
2732   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2733   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2734   PetscFunctionReturn(0);
2735 }
2736 EXTERN_C_END
2737 
2738 EXTERN_C_BEGIN
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2741 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2742 {
2743   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2748   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2749   PetscFunctionReturn(0);
2750 }
2751 EXTERN_C_END
2752 
2753 #include "petscpc.h"
2754 EXTERN_C_BEGIN
2755 #undef __FUNCT__
2756 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2757 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2758 {
2759   Mat_MPIAIJ     *b;
2760   PetscErrorCode ierr;
2761   PetscInt       i;
2762 
2763   PetscFunctionBegin;
2764   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2765   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2766   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2767   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2768 
2769   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2770   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2771   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2772   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2773   if (d_nnz) {
2774     for (i=0; i<B->rmap->n; i++) {
2775       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2776     }
2777   }
2778   if (o_nnz) {
2779     for (i=0; i<B->rmap->n; i++) {
2780       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2781     }
2782   }
2783   b = (Mat_MPIAIJ*)B->data;
2784 
2785   if (!B->preallocated) {
2786     /* Explicitly create 2 MATSEQAIJ matrices. */
2787     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2788     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2789     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2790     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2791     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2792     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2793     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2794     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2795   }
2796 
2797   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2798   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2799   B->preallocated = PETSC_TRUE;
2800   PetscFunctionReturn(0);
2801 }
2802 EXTERN_C_END
2803 
2804 #undef __FUNCT__
2805 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2806 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2807 {
2808   Mat            mat;
2809   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2810   PetscErrorCode ierr;
2811 
2812   PetscFunctionBegin;
2813   *newmat       = 0;
2814   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2815   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2816   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2817   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2818   a    = (Mat_MPIAIJ*)mat->data;
2819 
2820   mat->factor       = matin->factor;
2821   mat->rmap->bs      = matin->rmap->bs;
2822   mat->assembled    = PETSC_TRUE;
2823   mat->insertmode   = NOT_SET_VALUES;
2824   mat->preallocated = PETSC_TRUE;
2825 
2826   a->size           = oldmat->size;
2827   a->rank           = oldmat->rank;
2828   a->donotstash     = oldmat->donotstash;
2829   a->roworiented    = oldmat->roworiented;
2830   a->rowindices     = 0;
2831   a->rowvalues      = 0;
2832   a->getrowactive   = PETSC_FALSE;
2833 
2834   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2835   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2836 
2837   if (oldmat->colmap) {
2838 #if defined (PETSC_USE_CTABLE)
2839     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2840 #else
2841     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2842     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2843     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2844 #endif
2845   } else a->colmap = 0;
2846   if (oldmat->garray) {
2847     PetscInt len;
2848     len  = oldmat->B->cmap->n;
2849     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2850     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2851     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2852   } else a->garray = 0;
2853 
2854   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2855   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2856   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2857   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2858   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2859   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2860   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2861   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2862   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2863   *newmat = mat;
2864   PetscFunctionReturn(0);
2865 }
2866 
2867 #undef __FUNCT__
2868 #define __FUNCT__ "MatLoad_MPIAIJ"
2869 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2870 {
2871   Mat            A;
2872   PetscScalar    *vals,*svals;
2873   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2874   MPI_Status     status;
2875   PetscErrorCode ierr;
2876   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2877   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2878   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2879   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2880   PetscInt       cend,cstart,n,*rowners;
2881   int            fd;
2882 
2883   PetscFunctionBegin;
2884   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2885   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2886   if (!rank) {
2887     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2888     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2889     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2890   }
2891 
2892   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2893   M = header[1]; N = header[2];
2894   /* determine ownership of all rows */
2895   m    = M/size + ((M % size) > rank);
2896   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2897   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2898 
2899   /* First process needs enough room for process with most rows */
2900   if (!rank) {
2901     mmax       = rowners[1];
2902     for (i=2; i<size; i++) {
2903       mmax = PetscMax(mmax,rowners[i]);
2904     }
2905   } else mmax = m;
2906 
2907   rowners[0] = 0;
2908   for (i=2; i<=size; i++) {
2909     rowners[i] += rowners[i-1];
2910   }
2911   rstart = rowners[rank];
2912   rend   = rowners[rank+1];
2913 
2914   /* distribute row lengths to all processors */
2915   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2916   if (!rank) {
2917     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2918     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2919     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2920     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2921     for (j=0; j<m; j++) {
2922       procsnz[0] += ourlens[j];
2923     }
2924     for (i=1; i<size; i++) {
2925       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2926       /* calculate the number of nonzeros on each processor */
2927       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2928         procsnz[i] += rowlengths[j];
2929       }
2930       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2931       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2932     }
2933     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2934   } else {
2935     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2936     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2937   }
2938 
2939   if (!rank) {
2940     /* determine max buffer needed and allocate it */
2941     maxnz = 0;
2942     for (i=0; i<size; i++) {
2943       maxnz = PetscMax(maxnz,procsnz[i]);
2944     }
2945     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2946 
2947     /* read in my part of the matrix column indices  */
2948     nz   = procsnz[0];
2949     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2950     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2951 
2952     /* read in every one elses and ship off */
2953     for (i=1; i<size; i++) {
2954       nz     = procsnz[i];
2955       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2956       mpicnt = PetscMPIIntCast(nz);
2957       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2958     }
2959     ierr = PetscFree(cols);CHKERRQ(ierr);
2960   } else {
2961     /* determine buffer space needed for message */
2962     nz = 0;
2963     for (i=0; i<m; i++) {
2964       nz += ourlens[i];
2965     }
2966     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2967 
2968     /* receive message of column indices*/
2969     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2970     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2971     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2972     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
2973     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
2974     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
2975   }
2976 
2977   /* determine column ownership if matrix is not square */
2978   if (N != M) {
2979     n      = N/size + ((N % size) > rank);
2980     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2981     cstart = cend - n;
2982   } else {
2983     cstart = rstart;
2984     cend   = rend;
2985     n      = cend - cstart;
2986   }
2987 
2988   /* loop over local rows, determining number of off diagonal entries */
2989   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2990   jj = 0;
2991   for (i=0; i<m; i++) {
2992     for (j=0; j<ourlens[i]; j++) {
2993       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2994       jj++;
2995     }
2996   }
2997 
2998   /* create our matrix */
2999   for (i=0; i<m; i++) {
3000     ourlens[i] -= offlens[i];
3001   }
3002   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3003   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
3004   ierr = MatSetType(A,type);CHKERRQ(ierr);
3005   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
3006 
3007   for (i=0; i<m; i++) {
3008     ourlens[i] += offlens[i];
3009   }
3010 
3011   if (!rank) {
3012     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3013 
3014     /* read in my part of the matrix numerical values  */
3015     nz   = procsnz[0];
3016     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3017 
3018     /* insert into matrix */
3019     jj      = rstart;
3020     smycols = mycols;
3021     svals   = vals;
3022     for (i=0; i<m; i++) {
3023       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3024       smycols += ourlens[i];
3025       svals   += ourlens[i];
3026       jj++;
3027     }
3028 
3029     /* read in other processors and ship out */
3030     for (i=1; i<size; i++) {
3031       nz     = procsnz[i];
3032       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3033       mpicnt = PetscMPIIntCast(nz);
3034       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3035     }
3036     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3037   } else {
3038     /* receive numeric values */
3039     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3040 
3041     /* receive message of values*/
3042     mpicnt = PetscMPIIntCast(nz);
3043     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3044     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3045     if (mpimaxnz == MPI_UNDEFINED) {SETERRQ1(PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);}
3046     else if (mpimaxnz < 0) {SETERRQ2(PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);}
3047     else if (mpimaxnz != mpicnt) {SETERRQ2(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);}
3048 
3049     /* insert into matrix */
3050     jj      = rstart;
3051     smycols = mycols;
3052     svals   = vals;
3053     for (i=0; i<m; i++) {
3054       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3055       smycols += ourlens[i];
3056       svals   += ourlens[i];
3057       jj++;
3058     }
3059   }
3060   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3061   ierr = PetscFree(vals);CHKERRQ(ierr);
3062   ierr = PetscFree(mycols);CHKERRQ(ierr);
3063   ierr = PetscFree(rowners);CHKERRQ(ierr);
3064 
3065   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3066   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3067   *newmat = A;
3068   PetscFunctionReturn(0);
3069 }
3070 
3071 #undef __FUNCT__
3072 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3073 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3074 {
3075   PetscErrorCode ierr;
3076   IS             iscol_local;
3077   PetscInt       csize;
3078 
3079   PetscFunctionBegin;
3080   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3081   if (call == MAT_REUSE_MATRIX) {
3082     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3083     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3084   } else {
3085     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3086   }
3087   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3088   if (call == MAT_INITIAL_MATRIX) {
3089     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3090     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3091   }
3092   PetscFunctionReturn(0);
3093 }
3094 
3095 #undef __FUNCT__
3096 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3097 /*
3098     Not great since it makes two copies of the submatrix, first an SeqAIJ
3099   in local and then by concatenating the local matrices the end result.
3100   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3101 
3102   Note: This requires a sequential iscol with all indices.
3103 */
3104 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3105 {
3106   PetscErrorCode ierr;
3107   PetscMPIInt    rank,size;
3108   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3109   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3110   Mat            *local,M,Mreuse;
3111   MatScalar      *vwork,*aa;
3112   MPI_Comm       comm = ((PetscObject)mat)->comm;
3113   Mat_SeqAIJ     *aij;
3114 
3115 
3116   PetscFunctionBegin;
3117   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3118   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3119 
3120   if (call ==  MAT_REUSE_MATRIX) {
3121     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3122     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3123     local = &Mreuse;
3124     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3125   } else {
3126     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3127     Mreuse = *local;
3128     ierr   = PetscFree(local);CHKERRQ(ierr);
3129   }
3130 
3131   /*
3132       m - number of local rows
3133       n - number of columns (same on all processors)
3134       rstart - first row in new global matrix generated
3135   */
3136   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3137   if (call == MAT_INITIAL_MATRIX) {
3138     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3139     ii  = aij->i;
3140     jj  = aij->j;
3141 
3142     /*
3143         Determine the number of non-zeros in the diagonal and off-diagonal
3144         portions of the matrix in order to do correct preallocation
3145     */
3146 
3147     /* first get start and end of "diagonal" columns */
3148     if (csize == PETSC_DECIDE) {
3149       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3150       if (mglobal == n) { /* square matrix */
3151 	nlocal = m;
3152       } else {
3153         nlocal = n/size + ((n % size) > rank);
3154       }
3155     } else {
3156       nlocal = csize;
3157     }
3158     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3159     rstart = rend - nlocal;
3160     if (rank == size - 1 && rend != n) {
3161       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3162     }
3163 
3164     /* next, compute all the lengths */
3165     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3166     olens = dlens + m;
3167     for (i=0; i<m; i++) {
3168       jend = ii[i+1] - ii[i];
3169       olen = 0;
3170       dlen = 0;
3171       for (j=0; j<jend; j++) {
3172         if (*jj < rstart || *jj >= rend) olen++;
3173         else dlen++;
3174         jj++;
3175       }
3176       olens[i] = olen;
3177       dlens[i] = dlen;
3178     }
3179     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3180     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3181     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3182     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3183     ierr = PetscFree(dlens);CHKERRQ(ierr);
3184   } else {
3185     PetscInt ml,nl;
3186 
3187     M = *newmat;
3188     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3189     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3190     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3191     /*
3192          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3193        rather than the slower MatSetValues().
3194     */
3195     M->was_assembled = PETSC_TRUE;
3196     M->assembled     = PETSC_FALSE;
3197   }
3198   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3199   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3200   ii  = aij->i;
3201   jj  = aij->j;
3202   aa  = aij->a;
3203   for (i=0; i<m; i++) {
3204     row   = rstart + i;
3205     nz    = ii[i+1] - ii[i];
3206     cwork = jj;     jj += nz;
3207     vwork = aa;     aa += nz;
3208     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3209   }
3210 
3211   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3212   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3213   *newmat = M;
3214 
3215   /* save submatrix used in processor for next request */
3216   if (call ==  MAT_INITIAL_MATRIX) {
3217     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3218     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3219   }
3220 
3221   PetscFunctionReturn(0);
3222 }
3223 
3224 EXTERN_C_BEGIN
3225 #undef __FUNCT__
3226 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3227 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3228 {
3229   PetscInt       m,cstart, cend,j,nnz,i,d;
3230   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3231   const PetscInt *JJ;
3232   PetscScalar    *values;
3233   PetscErrorCode ierr;
3234 
3235   PetscFunctionBegin;
3236   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3237 
3238   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3239   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3240   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3241   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3242   m      = B->rmap->n;
3243   cstart = B->cmap->rstart;
3244   cend   = B->cmap->rend;
3245   rstart = B->rmap->rstart;
3246 
3247   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3248 
3249 #if defined(PETSC_USE_DEBUGGING)
3250   for (i=0; i<m; i++) {
3251     nnz     = Ii[i+1]- Ii[i];
3252     JJ      = J + Ii[i];
3253     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3254     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3255     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3256   }
3257 #endif
3258 
3259   for (i=0; i<m; i++) {
3260     nnz     = Ii[i+1]- Ii[i];
3261     JJ      = J + Ii[i];
3262     nnz_max = PetscMax(nnz_max,nnz);
3263     d       = 0;
3264     for (j=0; j<nnz; j++) {
3265       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3266     }
3267     d_nnz[i] = d;
3268     o_nnz[i] = nnz - d;
3269   }
3270   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3271   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3272 
3273   if (v) values = (PetscScalar*)v;
3274   else {
3275     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3276     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3277   }
3278 
3279   for (i=0; i<m; i++) {
3280     ii   = i + rstart;
3281     nnz  = Ii[i+1]- Ii[i];
3282     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3283   }
3284   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3285   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3286 
3287   if (!v) {
3288     ierr = PetscFree(values);CHKERRQ(ierr);
3289   }
3290   PetscFunctionReturn(0);
3291 }
3292 EXTERN_C_END
3293 
3294 #undef __FUNCT__
3295 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3296 /*@
3297    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3298    (the default parallel PETSc format).
3299 
3300    Collective on MPI_Comm
3301 
3302    Input Parameters:
3303 +  B - the matrix
3304 .  i - the indices into j for the start of each local row (starts with zero)
3305 .  j - the column indices for each local row (starts with zero)
3306 -  v - optional values in the matrix
3307 
3308    Level: developer
3309 
3310    Notes:
3311        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3312      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3313      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3314 
3315        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3316 
3317        The format which is used for the sparse matrix input, is equivalent to a
3318     row-major ordering.. i.e for the following matrix, the input data expected is
3319     as shown:
3320 
3321         1 0 0
3322         2 0 3     P0
3323        -------
3324         4 5 6     P1
3325 
3326      Process0 [P0]: rows_owned=[0,1]
3327         i =  {0,1,3}  [size = nrow+1  = 2+1]
3328         j =  {0,0,2}  [size = nz = 6]
3329         v =  {1,2,3}  [size = nz = 6]
3330 
3331      Process1 [P1]: rows_owned=[2]
3332         i =  {0,3}    [size = nrow+1  = 1+1]
3333         j =  {0,1,2}  [size = nz = 6]
3334         v =  {4,5,6}  [size = nz = 6]
3335 
3336 .keywords: matrix, aij, compressed row, sparse, parallel
3337 
3338 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3339           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3340 @*/
3341 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3342 {
3343   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3344 
3345   PetscFunctionBegin;
3346   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3347   if (f) {
3348     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3349   }
3350   PetscFunctionReturn(0);
3351 }
3352 
3353 #undef __FUNCT__
3354 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3355 /*@C
3356    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3357    (the default parallel PETSc format).  For good matrix assembly performance
3358    the user should preallocate the matrix storage by setting the parameters
3359    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3360    performance can be increased by more than a factor of 50.
3361 
3362    Collective on MPI_Comm
3363 
3364    Input Parameters:
3365 +  A - the matrix
3366 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3367            (same value is used for all local rows)
3368 .  d_nnz - array containing the number of nonzeros in the various rows of the
3369            DIAGONAL portion of the local submatrix (possibly different for each row)
3370            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3371            The size of this array is equal to the number of local rows, i.e 'm'.
3372            You must leave room for the diagonal entry even if it is zero.
3373 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3374            submatrix (same value is used for all local rows).
3375 -  o_nnz - array containing the number of nonzeros in the various rows of the
3376            OFF-DIAGONAL portion of the local submatrix (possibly different for
3377            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3378            structure. The size of this array is equal to the number
3379            of local rows, i.e 'm'.
3380 
3381    If the *_nnz parameter is given then the *_nz parameter is ignored
3382 
3383    The AIJ format (also called the Yale sparse matrix format or
3384    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3385    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3386 
3387    The parallel matrix is partitioned such that the first m0 rows belong to
3388    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3389    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3390 
3391    The DIAGONAL portion of the local submatrix of a processor can be defined
3392    as the submatrix which is obtained by extraction the part corresponding
3393    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3394    first row that belongs to the processor, and r2 is the last row belonging
3395    to the this processor. This is a square mxm matrix. The remaining portion
3396    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3397 
3398    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3399 
3400    You can call MatGetInfo() to get information on how effective the preallocation was;
3401    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3402    You can also run with the option -info and look for messages with the string
3403    malloc in them to see if additional memory allocation was needed.
3404 
3405    Example usage:
3406 
3407    Consider the following 8x8 matrix with 34 non-zero values, that is
3408    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3409    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3410    as follows:
3411 
3412 .vb
3413             1  2  0  |  0  3  0  |  0  4
3414     Proc0   0  5  6  |  7  0  0  |  8  0
3415             9  0 10  | 11  0  0  | 12  0
3416     -------------------------------------
3417            13  0 14  | 15 16 17  |  0  0
3418     Proc1   0 18  0  | 19 20 21  |  0  0
3419             0  0  0  | 22 23  0  | 24  0
3420     -------------------------------------
3421     Proc2  25 26 27  |  0  0 28  | 29  0
3422            30  0  0  | 31 32 33  |  0 34
3423 .ve
3424 
3425    This can be represented as a collection of submatrices as:
3426 
3427 .vb
3428       A B C
3429       D E F
3430       G H I
3431 .ve
3432 
3433    Where the submatrices A,B,C are owned by proc0, D,E,F are
3434    owned by proc1, G,H,I are owned by proc2.
3435 
3436    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3437    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3438    The 'M','N' parameters are 8,8, and have the same values on all procs.
3439 
3440    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3441    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3442    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3443    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3444    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3445    matrix, ans [DF] as another SeqAIJ matrix.
3446 
3447    When d_nz, o_nz parameters are specified, d_nz storage elements are
3448    allocated for every row of the local diagonal submatrix, and o_nz
3449    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3450    One way to choose d_nz and o_nz is to use the max nonzerors per local
3451    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3452    In this case, the values of d_nz,o_nz are:
3453 .vb
3454      proc0 : dnz = 2, o_nz = 2
3455      proc1 : dnz = 3, o_nz = 2
3456      proc2 : dnz = 1, o_nz = 4
3457 .ve
3458    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3459    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3460    for proc3. i.e we are using 12+15+10=37 storage locations to store
3461    34 values.
3462 
3463    When d_nnz, o_nnz parameters are specified, the storage is specified
3464    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3465    In the above case the values for d_nnz,o_nnz are:
3466 .vb
3467      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3468      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3469      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3470 .ve
3471    Here the space allocated is sum of all the above values i.e 34, and
3472    hence pre-allocation is perfect.
3473 
3474    Level: intermediate
3475 
3476 .keywords: matrix, aij, compressed row, sparse, parallel
3477 
3478 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3479           MPIAIJ, MatGetInfo()
3480 @*/
3481 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3482 {
3483   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3484 
3485   PetscFunctionBegin;
3486   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3487   if (f) {
3488     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3489   }
3490   PetscFunctionReturn(0);
3491 }
3492 
3493 #undef __FUNCT__
3494 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3495 /*@
3496      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3497          CSR format the local rows.
3498 
3499    Collective on MPI_Comm
3500 
3501    Input Parameters:
3502 +  comm - MPI communicator
3503 .  m - number of local rows (Cannot be PETSC_DECIDE)
3504 .  n - This value should be the same as the local size used in creating the
3505        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3506        calculated if N is given) For square matrices n is almost always m.
3507 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3508 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3509 .   i - row indices
3510 .   j - column indices
3511 -   a - matrix values
3512 
3513    Output Parameter:
3514 .   mat - the matrix
3515 
3516    Level: intermediate
3517 
3518    Notes:
3519        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3520      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3521      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3522 
3523        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3524 
3525        The format which is used for the sparse matrix input, is equivalent to a
3526     row-major ordering.. i.e for the following matrix, the input data expected is
3527     as shown:
3528 
3529         1 0 0
3530         2 0 3     P0
3531        -------
3532         4 5 6     P1
3533 
3534      Process0 [P0]: rows_owned=[0,1]
3535         i =  {0,1,3}  [size = nrow+1  = 2+1]
3536         j =  {0,0,2}  [size = nz = 6]
3537         v =  {1,2,3}  [size = nz = 6]
3538 
3539      Process1 [P1]: rows_owned=[2]
3540         i =  {0,3}    [size = nrow+1  = 1+1]
3541         j =  {0,1,2}  [size = nz = 6]
3542         v =  {4,5,6}  [size = nz = 6]
3543 
3544 .keywords: matrix, aij, compressed row, sparse, parallel
3545 
3546 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3547           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3548 @*/
3549 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3550 {
3551   PetscErrorCode ierr;
3552 
3553  PetscFunctionBegin;
3554   if (i[0]) {
3555     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3556   }
3557   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3558   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3559   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3560   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3561   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3562   PetscFunctionReturn(0);
3563 }
3564 
3565 #undef __FUNCT__
3566 #define __FUNCT__ "MatCreateMPIAIJ"
3567 /*@C
3568    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3569    (the default parallel PETSc format).  For good matrix assembly performance
3570    the user should preallocate the matrix storage by setting the parameters
3571    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3572    performance can be increased by more than a factor of 50.
3573 
3574    Collective on MPI_Comm
3575 
3576    Input Parameters:
3577 +  comm - MPI communicator
3578 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3579            This value should be the same as the local size used in creating the
3580            y vector for the matrix-vector product y = Ax.
3581 .  n - This value should be the same as the local size used in creating the
3582        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3583        calculated if N is given) For square matrices n is almost always m.
3584 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3585 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3586 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3587            (same value is used for all local rows)
3588 .  d_nnz - array containing the number of nonzeros in the various rows of the
3589            DIAGONAL portion of the local submatrix (possibly different for each row)
3590            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3591            The size of this array is equal to the number of local rows, i.e 'm'.
3592            You must leave room for the diagonal entry even if it is zero.
3593 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3594            submatrix (same value is used for all local rows).
3595 -  o_nnz - array containing the number of nonzeros in the various rows of the
3596            OFF-DIAGONAL portion of the local submatrix (possibly different for
3597            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3598            structure. The size of this array is equal to the number
3599            of local rows, i.e 'm'.
3600 
3601    Output Parameter:
3602 .  A - the matrix
3603 
3604    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3605    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3606    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3607 
3608    Notes:
3609    If the *_nnz parameter is given then the *_nz parameter is ignored
3610 
3611    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3612    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3613    storage requirements for this matrix.
3614 
3615    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3616    processor than it must be used on all processors that share the object for
3617    that argument.
3618 
3619    The user MUST specify either the local or global matrix dimensions
3620    (possibly both).
3621 
3622    The parallel matrix is partitioned across processors such that the
3623    first m0 rows belong to process 0, the next m1 rows belong to
3624    process 1, the next m2 rows belong to process 2 etc.. where
3625    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3626    values corresponding to [m x N] submatrix.
3627 
3628    The columns are logically partitioned with the n0 columns belonging
3629    to 0th partition, the next n1 columns belonging to the next
3630    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3631 
3632    The DIAGONAL portion of the local submatrix on any given processor
3633    is the submatrix corresponding to the rows and columns m,n
3634    corresponding to the given processor. i.e diagonal matrix on
3635    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3636    etc. The remaining portion of the local submatrix [m x (N-n)]
3637    constitute the OFF-DIAGONAL portion. The example below better
3638    illustrates this concept.
3639 
3640    For a square global matrix we define each processor's diagonal portion
3641    to be its local rows and the corresponding columns (a square submatrix);
3642    each processor's off-diagonal portion encompasses the remainder of the
3643    local matrix (a rectangular submatrix).
3644 
3645    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3646 
3647    When calling this routine with a single process communicator, a matrix of
3648    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3649    type of communicator, use the construction mechanism:
3650      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3651 
3652    By default, this format uses inodes (identical nodes) when possible.
3653    We search for consecutive rows with the same nonzero structure, thereby
3654    reusing matrix information to achieve increased efficiency.
3655 
3656    Options Database Keys:
3657 +  -mat_no_inode  - Do not use inodes
3658 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3659 -  -mat_aij_oneindex - Internally use indexing starting at 1
3660         rather than 0.  Note that when calling MatSetValues(),
3661         the user still MUST index entries starting at 0!
3662 
3663 
3664    Example usage:
3665 
3666    Consider the following 8x8 matrix with 34 non-zero values, that is
3667    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3668    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3669    as follows:
3670 
3671 .vb
3672             1  2  0  |  0  3  0  |  0  4
3673     Proc0   0  5  6  |  7  0  0  |  8  0
3674             9  0 10  | 11  0  0  | 12  0
3675     -------------------------------------
3676            13  0 14  | 15 16 17  |  0  0
3677     Proc1   0 18  0  | 19 20 21  |  0  0
3678             0  0  0  | 22 23  0  | 24  0
3679     -------------------------------------
3680     Proc2  25 26 27  |  0  0 28  | 29  0
3681            30  0  0  | 31 32 33  |  0 34
3682 .ve
3683 
3684    This can be represented as a collection of submatrices as:
3685 
3686 .vb
3687       A B C
3688       D E F
3689       G H I
3690 .ve
3691 
3692    Where the submatrices A,B,C are owned by proc0, D,E,F are
3693    owned by proc1, G,H,I are owned by proc2.
3694 
3695    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3696    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3697    The 'M','N' parameters are 8,8, and have the same values on all procs.
3698 
3699    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3700    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3701    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3702    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3703    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3704    matrix, ans [DF] as another SeqAIJ matrix.
3705 
3706    When d_nz, o_nz parameters are specified, d_nz storage elements are
3707    allocated for every row of the local diagonal submatrix, and o_nz
3708    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3709    One way to choose d_nz and o_nz is to use the max nonzerors per local
3710    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3711    In this case, the values of d_nz,o_nz are:
3712 .vb
3713      proc0 : dnz = 2, o_nz = 2
3714      proc1 : dnz = 3, o_nz = 2
3715      proc2 : dnz = 1, o_nz = 4
3716 .ve
3717    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3718    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3719    for proc3. i.e we are using 12+15+10=37 storage locations to store
3720    34 values.
3721 
3722    When d_nnz, o_nnz parameters are specified, the storage is specified
3723    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3724    In the above case the values for d_nnz,o_nnz are:
3725 .vb
3726      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3727      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3728      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3729 .ve
3730    Here the space allocated is sum of all the above values i.e 34, and
3731    hence pre-allocation is perfect.
3732 
3733    Level: intermediate
3734 
3735 .keywords: matrix, aij, compressed row, sparse, parallel
3736 
3737 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3738           MPIAIJ, MatCreateMPIAIJWithArrays()
3739 @*/
3740 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3741 {
3742   PetscErrorCode ierr;
3743   PetscMPIInt    size;
3744 
3745   PetscFunctionBegin;
3746   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3747   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3748   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3749   if (size > 1) {
3750     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3751     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3752   } else {
3753     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3754     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3755   }
3756   PetscFunctionReturn(0);
3757 }
3758 
3759 #undef __FUNCT__
3760 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3761 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3762 {
3763   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3764 
3765   PetscFunctionBegin;
3766   *Ad     = a->A;
3767   *Ao     = a->B;
3768   *colmap = a->garray;
3769   PetscFunctionReturn(0);
3770 }
3771 
3772 #undef __FUNCT__
3773 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3774 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3775 {
3776   PetscErrorCode ierr;
3777   PetscInt       i;
3778   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3779 
3780   PetscFunctionBegin;
3781   if (coloring->ctype == IS_COLORING_GLOBAL) {
3782     ISColoringValue *allcolors,*colors;
3783     ISColoring      ocoloring;
3784 
3785     /* set coloring for diagonal portion */
3786     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3787 
3788     /* set coloring for off-diagonal portion */
3789     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3790     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3791     for (i=0; i<a->B->cmap->n; i++) {
3792       colors[i] = allcolors[a->garray[i]];
3793     }
3794     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3795     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3796     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3797     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3798   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3799     ISColoringValue *colors;
3800     PetscInt        *larray;
3801     ISColoring      ocoloring;
3802 
3803     /* set coloring for diagonal portion */
3804     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3805     for (i=0; i<a->A->cmap->n; i++) {
3806       larray[i] = i + A->cmap->rstart;
3807     }
3808     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3809     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3810     for (i=0; i<a->A->cmap->n; i++) {
3811       colors[i] = coloring->colors[larray[i]];
3812     }
3813     ierr = PetscFree(larray);CHKERRQ(ierr);
3814     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3815     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3816     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3817 
3818     /* set coloring for off-diagonal portion */
3819     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3820     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3821     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3822     for (i=0; i<a->B->cmap->n; i++) {
3823       colors[i] = coloring->colors[larray[i]];
3824     }
3825     ierr = PetscFree(larray);CHKERRQ(ierr);
3826     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3827     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3828     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3829   } else {
3830     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3831   }
3832 
3833   PetscFunctionReturn(0);
3834 }
3835 
3836 #if defined(PETSC_HAVE_ADIC)
3837 #undef __FUNCT__
3838 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3839 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3840 {
3841   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3842   PetscErrorCode ierr;
3843 
3844   PetscFunctionBegin;
3845   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3846   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3847   PetscFunctionReturn(0);
3848 }
3849 #endif
3850 
3851 #undef __FUNCT__
3852 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3853 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3854 {
3855   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3856   PetscErrorCode ierr;
3857 
3858   PetscFunctionBegin;
3859   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3860   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3861   PetscFunctionReturn(0);
3862 }
3863 
3864 #undef __FUNCT__
3865 #define __FUNCT__ "MatMerge"
3866 /*@
3867       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3868                  matrices from each processor
3869 
3870     Collective on MPI_Comm
3871 
3872    Input Parameters:
3873 +    comm - the communicators the parallel matrix will live on
3874 .    inmat - the input sequential matrices
3875 .    n - number of local columns (or PETSC_DECIDE)
3876 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3877 
3878    Output Parameter:
3879 .    outmat - the parallel matrix generated
3880 
3881     Level: advanced
3882 
3883    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3884 
3885 @*/
3886 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3887 {
3888   PetscErrorCode ierr;
3889   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3890   PetscInt       *indx;
3891   PetscScalar    *values;
3892 
3893   PetscFunctionBegin;
3894   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3895   if (scall == MAT_INITIAL_MATRIX){
3896     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3897     if (n == PETSC_DECIDE){
3898       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3899     }
3900     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3901     rstart -= m;
3902 
3903     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3904     for (i=0;i<m;i++) {
3905       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3906       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3907       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3908     }
3909     /* This routine will ONLY return MPIAIJ type matrix */
3910     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3911     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3912     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3913     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3914     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3915 
3916   } else if (scall == MAT_REUSE_MATRIX){
3917     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3918   } else {
3919     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3920   }
3921 
3922   for (i=0;i<m;i++) {
3923     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3924     Ii    = i + rstart;
3925     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3926     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3927   }
3928   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3929   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3930   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3931 
3932   PetscFunctionReturn(0);
3933 }
3934 
3935 #undef __FUNCT__
3936 #define __FUNCT__ "MatFileSplit"
3937 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3938 {
3939   PetscErrorCode    ierr;
3940   PetscMPIInt       rank;
3941   PetscInt          m,N,i,rstart,nnz;
3942   size_t            len;
3943   const PetscInt    *indx;
3944   PetscViewer       out;
3945   char              *name;
3946   Mat               B;
3947   const PetscScalar *values;
3948 
3949   PetscFunctionBegin;
3950   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3951   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3952   /* Should this be the type of the diagonal block of A? */
3953   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3954   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3955   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3956   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3957   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3958   for (i=0;i<m;i++) {
3959     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3960     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3961     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3962   }
3963   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3964   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3965 
3966   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3967   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3968   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3969   sprintf(name,"%s.%d",outfile,rank);
3970   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3971   ierr = PetscFree(name);
3972   ierr = MatView(B,out);CHKERRQ(ierr);
3973   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3974   ierr = MatDestroy(B);CHKERRQ(ierr);
3975   PetscFunctionReturn(0);
3976 }
3977 
3978 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3979 #undef __FUNCT__
3980 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3981 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3982 {
3983   PetscErrorCode       ierr;
3984   Mat_Merge_SeqsToMPI  *merge;
3985   PetscContainer       container;
3986 
3987   PetscFunctionBegin;
3988   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3989   if (container) {
3990     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3991     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3992     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3993     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3994     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3995     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3996     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
3997     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3998     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
3999     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4000     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4001     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4002     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4003     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4004 
4005     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4006     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4007   }
4008   ierr = PetscFree(merge);CHKERRQ(ierr);
4009 
4010   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4011   PetscFunctionReturn(0);
4012 }
4013 
4014 #include "../src/mat/utils/freespace.h"
4015 #include "petscbt.h"
4016 
4017 #undef __FUNCT__
4018 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4019 /*@C
4020       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4021                  matrices from each processor
4022 
4023     Collective on MPI_Comm
4024 
4025    Input Parameters:
4026 +    comm - the communicators the parallel matrix will live on
4027 .    seqmat - the input sequential matrices
4028 .    m - number of local rows (or PETSC_DECIDE)
4029 .    n - number of local columns (or PETSC_DECIDE)
4030 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4031 
4032    Output Parameter:
4033 .    mpimat - the parallel matrix generated
4034 
4035     Level: advanced
4036 
4037    Notes:
4038      The dimensions of the sequential matrix in each processor MUST be the same.
4039      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4040      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4041 @*/
4042 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4043 {
4044   PetscErrorCode       ierr;
4045   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4046   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4047   PetscMPIInt          size,rank,taga,*len_s;
4048   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4049   PetscInt             proc,m;
4050   PetscInt             **buf_ri,**buf_rj;
4051   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4052   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4053   MPI_Request          *s_waits,*r_waits;
4054   MPI_Status           *status;
4055   MatScalar            *aa=a->a;
4056   MatScalar            **abuf_r,*ba_i;
4057   Mat_Merge_SeqsToMPI  *merge;
4058   PetscContainer       container;
4059 
4060   PetscFunctionBegin;
4061   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4062 
4063   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4064   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4065 
4066   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4067   if (container) {
4068     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4069   }
4070   bi     = merge->bi;
4071   bj     = merge->bj;
4072   buf_ri = merge->buf_ri;
4073   buf_rj = merge->buf_rj;
4074 
4075   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4076   owners = merge->rowmap->range;
4077   len_s  = merge->len_s;
4078 
4079   /* send and recv matrix values */
4080   /*-----------------------------*/
4081   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4082   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4083 
4084   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4085   for (proc=0,k=0; proc<size; proc++){
4086     if (!len_s[proc]) continue;
4087     i = owners[proc];
4088     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4089     k++;
4090   }
4091 
4092   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4093   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4094   ierr = PetscFree(status);CHKERRQ(ierr);
4095 
4096   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4097   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4098 
4099   /* insert mat values of mpimat */
4100   /*----------------------------*/
4101   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4102   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4103 
4104   for (k=0; k<merge->nrecv; k++){
4105     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4106     nrows = *(buf_ri_k[k]);
4107     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4108     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4109   }
4110 
4111   /* set values of ba */
4112   m = merge->rowmap->n;
4113   for (i=0; i<m; i++) {
4114     arow = owners[rank] + i;
4115     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4116     bnzi = bi[i+1] - bi[i];
4117     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4118 
4119     /* add local non-zero vals of this proc's seqmat into ba */
4120     anzi = ai[arow+1] - ai[arow];
4121     aj   = a->j + ai[arow];
4122     aa   = a->a + ai[arow];
4123     nextaj = 0;
4124     for (j=0; nextaj<anzi; j++){
4125       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4126         ba_i[j] += aa[nextaj++];
4127       }
4128     }
4129 
4130     /* add received vals into ba */
4131     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4132       /* i-th row */
4133       if (i == *nextrow[k]) {
4134         anzi = *(nextai[k]+1) - *nextai[k];
4135         aj   = buf_rj[k] + *(nextai[k]);
4136         aa   = abuf_r[k] + *(nextai[k]);
4137         nextaj = 0;
4138         for (j=0; nextaj<anzi; j++){
4139           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4140             ba_i[j] += aa[nextaj++];
4141           }
4142         }
4143         nextrow[k]++; nextai[k]++;
4144       }
4145     }
4146     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4147   }
4148   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4149   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4150 
4151   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4152   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4153   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4154   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4155   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4156   PetscFunctionReturn(0);
4157 }
4158 
4159 #undef __FUNCT__
4160 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4161 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4162 {
4163   PetscErrorCode       ierr;
4164   Mat                  B_mpi;
4165   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4166   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4167   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4168   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4169   PetscInt             len,proc,*dnz,*onz;
4170   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4171   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4172   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4173   MPI_Status           *status;
4174   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4175   PetscBT              lnkbt;
4176   Mat_Merge_SeqsToMPI  *merge;
4177   PetscContainer       container;
4178 
4179   PetscFunctionBegin;
4180   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4181 
4182   /* make sure it is a PETSc comm */
4183   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4184   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4185   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4186 
4187   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4188   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4189 
4190   /* determine row ownership */
4191   /*---------------------------------------------------------*/
4192   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4193   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4194   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4195   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4196   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4197   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4198   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4199 
4200   m      = merge->rowmap->n;
4201   M      = merge->rowmap->N;
4202   owners = merge->rowmap->range;
4203 
4204   /* determine the number of messages to send, their lengths */
4205   /*---------------------------------------------------------*/
4206   len_s  = merge->len_s;
4207 
4208   len = 0;  /* length of buf_si[] */
4209   merge->nsend = 0;
4210   for (proc=0; proc<size; proc++){
4211     len_si[proc] = 0;
4212     if (proc == rank){
4213       len_s[proc] = 0;
4214     } else {
4215       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4216       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4217     }
4218     if (len_s[proc]) {
4219       merge->nsend++;
4220       nrows = 0;
4221       for (i=owners[proc]; i<owners[proc+1]; i++){
4222         if (ai[i+1] > ai[i]) nrows++;
4223       }
4224       len_si[proc] = 2*(nrows+1);
4225       len += len_si[proc];
4226     }
4227   }
4228 
4229   /* determine the number and length of messages to receive for ij-structure */
4230   /*-------------------------------------------------------------------------*/
4231   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4232   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4233 
4234   /* post the Irecv of j-structure */
4235   /*-------------------------------*/
4236   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4237   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4238 
4239   /* post the Isend of j-structure */
4240   /*--------------------------------*/
4241   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4242 
4243   for (proc=0, k=0; proc<size; proc++){
4244     if (!len_s[proc]) continue;
4245     i = owners[proc];
4246     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4247     k++;
4248   }
4249 
4250   /* receives and sends of j-structure are complete */
4251   /*------------------------------------------------*/
4252   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4253   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4254 
4255   /* send and recv i-structure */
4256   /*---------------------------*/
4257   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4258   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4259 
4260   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4261   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4262   for (proc=0,k=0; proc<size; proc++){
4263     if (!len_s[proc]) continue;
4264     /* form outgoing message for i-structure:
4265          buf_si[0]:                 nrows to be sent
4266                [1:nrows]:           row index (global)
4267                [nrows+1:2*nrows+1]: i-structure index
4268     */
4269     /*-------------------------------------------*/
4270     nrows = len_si[proc]/2 - 1;
4271     buf_si_i    = buf_si + nrows+1;
4272     buf_si[0]   = nrows;
4273     buf_si_i[0] = 0;
4274     nrows = 0;
4275     for (i=owners[proc]; i<owners[proc+1]; i++){
4276       anzi = ai[i+1] - ai[i];
4277       if (anzi) {
4278         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4279         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4280         nrows++;
4281       }
4282     }
4283     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4284     k++;
4285     buf_si += len_si[proc];
4286   }
4287 
4288   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4289   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4290 
4291   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4292   for (i=0; i<merge->nrecv; i++){
4293     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4294   }
4295 
4296   ierr = PetscFree(len_si);CHKERRQ(ierr);
4297   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4298   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4299   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4300   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4301   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4302   ierr = PetscFree(status);CHKERRQ(ierr);
4303 
4304   /* compute a local seq matrix in each processor */
4305   /*----------------------------------------------*/
4306   /* allocate bi array and free space for accumulating nonzero column info */
4307   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4308   bi[0] = 0;
4309 
4310   /* create and initialize a linked list */
4311   nlnk = N+1;
4312   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4313 
4314   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4315   len = 0;
4316   len  = ai[owners[rank+1]] - ai[owners[rank]];
4317   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4318   current_space = free_space;
4319 
4320   /* determine symbolic info for each local row */
4321   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4322 
4323   for (k=0; k<merge->nrecv; k++){
4324     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4325     nrows = *buf_ri_k[k];
4326     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4327     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4328   }
4329 
4330   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4331   len = 0;
4332   for (i=0;i<m;i++) {
4333     bnzi   = 0;
4334     /* add local non-zero cols of this proc's seqmat into lnk */
4335     arow   = owners[rank] + i;
4336     anzi   = ai[arow+1] - ai[arow];
4337     aj     = a->j + ai[arow];
4338     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4339     bnzi += nlnk;
4340     /* add received col data into lnk */
4341     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4342       if (i == *nextrow[k]) { /* i-th row */
4343         anzi = *(nextai[k]+1) - *nextai[k];
4344         aj   = buf_rj[k] + *nextai[k];
4345         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4346         bnzi += nlnk;
4347         nextrow[k]++; nextai[k]++;
4348       }
4349     }
4350     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4351 
4352     /* if free space is not available, make more free space */
4353     if (current_space->local_remaining<bnzi) {
4354       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4355       nspacedouble++;
4356     }
4357     /* copy data into free space, then initialize lnk */
4358     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4359     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4360 
4361     current_space->array           += bnzi;
4362     current_space->local_used      += bnzi;
4363     current_space->local_remaining -= bnzi;
4364 
4365     bi[i+1] = bi[i] + bnzi;
4366   }
4367 
4368   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4369 
4370   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4371   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4372   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4373 
4374   /* create symbolic parallel matrix B_mpi */
4375   /*---------------------------------------*/
4376   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4377   if (n==PETSC_DECIDE) {
4378     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4379   } else {
4380     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4381   }
4382   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4383   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4384   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4385 
4386   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4387   B_mpi->assembled     = PETSC_FALSE;
4388   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4389   merge->bi            = bi;
4390   merge->bj            = bj;
4391   merge->buf_ri        = buf_ri;
4392   merge->buf_rj        = buf_rj;
4393   merge->coi           = PETSC_NULL;
4394   merge->coj           = PETSC_NULL;
4395   merge->owners_co     = PETSC_NULL;
4396 
4397   /* attach the supporting struct to B_mpi for reuse */
4398   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4399   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4400   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4401   *mpimat = B_mpi;
4402 
4403   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4404   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4405   PetscFunctionReturn(0);
4406 }
4407 
4408 #undef __FUNCT__
4409 #define __FUNCT__ "MatMerge_SeqsToMPI"
4410 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4411 {
4412   PetscErrorCode   ierr;
4413 
4414   PetscFunctionBegin;
4415   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4416   if (scall == MAT_INITIAL_MATRIX){
4417     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4418   }
4419   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4420   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4421   PetscFunctionReturn(0);
4422 }
4423 
4424 #undef __FUNCT__
4425 #define __FUNCT__ "MatGetLocalMat"
4426 /*@
4427      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4428 
4429     Not Collective
4430 
4431    Input Parameters:
4432 +    A - the matrix
4433 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4434 
4435    Output Parameter:
4436 .    A_loc - the local sequential matrix generated
4437 
4438     Level: developer
4439 
4440 @*/
4441 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4442 {
4443   PetscErrorCode  ierr;
4444   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4445   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4446   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4447   MatScalar       *aa=a->a,*ba=b->a,*cam;
4448   PetscScalar     *ca;
4449   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4450   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4451 
4452   PetscFunctionBegin;
4453   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4454   if (scall == MAT_INITIAL_MATRIX){
4455     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4456     ci[0] = 0;
4457     for (i=0; i<am; i++){
4458       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4459     }
4460     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4461     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4462     k = 0;
4463     for (i=0; i<am; i++) {
4464       ncols_o = bi[i+1] - bi[i];
4465       ncols_d = ai[i+1] - ai[i];
4466       /* off-diagonal portion of A */
4467       for (jo=0; jo<ncols_o; jo++) {
4468         col = cmap[*bj];
4469         if (col >= cstart) break;
4470         cj[k]   = col; bj++;
4471         ca[k++] = *ba++;
4472       }
4473       /* diagonal portion of A */
4474       for (j=0; j<ncols_d; j++) {
4475         cj[k]   = cstart + *aj++;
4476         ca[k++] = *aa++;
4477       }
4478       /* off-diagonal portion of A */
4479       for (j=jo; j<ncols_o; j++) {
4480         cj[k]   = cmap[*bj++];
4481         ca[k++] = *ba++;
4482       }
4483     }
4484     /* put together the new matrix */
4485     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4486     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4487     /* Since these are PETSc arrays, change flags to free them as necessary. */
4488     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4489     mat->free_a  = PETSC_TRUE;
4490     mat->free_ij = PETSC_TRUE;
4491     mat->nonew   = 0;
4492   } else if (scall == MAT_REUSE_MATRIX){
4493     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4494     ci = mat->i; cj = mat->j; cam = mat->a;
4495     for (i=0; i<am; i++) {
4496       /* off-diagonal portion of A */
4497       ncols_o = bi[i+1] - bi[i];
4498       for (jo=0; jo<ncols_o; jo++) {
4499         col = cmap[*bj];
4500         if (col >= cstart) break;
4501         *cam++ = *ba++; bj++;
4502       }
4503       /* diagonal portion of A */
4504       ncols_d = ai[i+1] - ai[i];
4505       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4506       /* off-diagonal portion of A */
4507       for (j=jo; j<ncols_o; j++) {
4508         *cam++ = *ba++; bj++;
4509       }
4510     }
4511   } else {
4512     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4513   }
4514 
4515   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4516   PetscFunctionReturn(0);
4517 }
4518 
4519 #undef __FUNCT__
4520 #define __FUNCT__ "MatGetLocalMatCondensed"
4521 /*@C
4522      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4523 
4524     Not Collective
4525 
4526    Input Parameters:
4527 +    A - the matrix
4528 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4529 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4530 
4531    Output Parameter:
4532 .    A_loc - the local sequential matrix generated
4533 
4534     Level: developer
4535 
4536 @*/
4537 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4538 {
4539   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4540   PetscErrorCode    ierr;
4541   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4542   IS                isrowa,iscola;
4543   Mat               *aloc;
4544 
4545   PetscFunctionBegin;
4546   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4547   if (!row){
4548     start = A->rmap->rstart; end = A->rmap->rend;
4549     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4550   } else {
4551     isrowa = *row;
4552   }
4553   if (!col){
4554     start = A->cmap->rstart;
4555     cmap  = a->garray;
4556     nzA   = a->A->cmap->n;
4557     nzB   = a->B->cmap->n;
4558     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4559     ncols = 0;
4560     for (i=0; i<nzB; i++) {
4561       if (cmap[i] < start) idx[ncols++] = cmap[i];
4562       else break;
4563     }
4564     imark = i;
4565     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4566     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4567     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4568     ierr = PetscFree(idx);CHKERRQ(ierr);
4569   } else {
4570     iscola = *col;
4571   }
4572   if (scall != MAT_INITIAL_MATRIX){
4573     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4574     aloc[0] = *A_loc;
4575   }
4576   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4577   *A_loc = aloc[0];
4578   ierr = PetscFree(aloc);CHKERRQ(ierr);
4579   if (!row){
4580     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4581   }
4582   if (!col){
4583     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4584   }
4585   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4586   PetscFunctionReturn(0);
4587 }
4588 
4589 #undef __FUNCT__
4590 #define __FUNCT__ "MatGetBrowsOfAcols"
4591 /*@C
4592     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4593 
4594     Collective on Mat
4595 
4596    Input Parameters:
4597 +    A,B - the matrices in mpiaij format
4598 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4599 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4600 
4601    Output Parameter:
4602 +    rowb, colb - index sets of rows and columns of B to extract
4603 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4604 -    B_seq - the sequential matrix generated
4605 
4606     Level: developer
4607 
4608 @*/
4609 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4610 {
4611   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4612   PetscErrorCode    ierr;
4613   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4614   IS                isrowb,iscolb;
4615   Mat               *bseq;
4616 
4617   PetscFunctionBegin;
4618   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4619     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4620   }
4621   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4622 
4623   if (scall == MAT_INITIAL_MATRIX){
4624     start = A->cmap->rstart;
4625     cmap  = a->garray;
4626     nzA   = a->A->cmap->n;
4627     nzB   = a->B->cmap->n;
4628     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4629     ncols = 0;
4630     for (i=0; i<nzB; i++) {  /* row < local row index */
4631       if (cmap[i] < start) idx[ncols++] = cmap[i];
4632       else break;
4633     }
4634     imark = i;
4635     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4636     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4637     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4638     ierr = PetscFree(idx);CHKERRQ(ierr);
4639     *brstart = imark;
4640     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4641   } else {
4642     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4643     isrowb = *rowb; iscolb = *colb;
4644     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4645     bseq[0] = *B_seq;
4646   }
4647   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4648   *B_seq = bseq[0];
4649   ierr = PetscFree(bseq);CHKERRQ(ierr);
4650   if (!rowb){
4651     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4652   } else {
4653     *rowb = isrowb;
4654   }
4655   if (!colb){
4656     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4657   } else {
4658     *colb = iscolb;
4659   }
4660   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4661   PetscFunctionReturn(0);
4662 }
4663 
4664 #undef __FUNCT__
4665 #define __FUNCT__ "MatGetBrowsOfAoCols"
4666 /*@C
4667     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4668     of the OFF-DIAGONAL portion of local A
4669 
4670     Collective on Mat
4671 
4672    Input Parameters:
4673 +    A,B - the matrices in mpiaij format
4674 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4675 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4676 .    startsj_r - similar to startsj for receives
4677 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4678 
4679    Output Parameter:
4680 +    B_oth - the sequential matrix generated
4681 
4682     Level: developer
4683 
4684 @*/
4685 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4686 {
4687   VecScatter_MPI_General *gen_to,*gen_from;
4688   PetscErrorCode         ierr;
4689   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4690   Mat_SeqAIJ             *b_oth;
4691   VecScatter             ctx=a->Mvctx;
4692   MPI_Comm               comm=((PetscObject)ctx)->comm;
4693   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4694   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4695   PetscScalar            *rvalues,*svalues;
4696   MatScalar              *b_otha,*bufa,*bufA;
4697   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4698   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4699   MPI_Status             *sstatus,rstatus;
4700   PetscMPIInt            jj;
4701   PetscInt               *cols,sbs,rbs;
4702   PetscScalar            *vals;
4703 
4704   PetscFunctionBegin;
4705   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4706     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4707   }
4708   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4709   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4710 
4711   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4712   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4713   rvalues  = gen_from->values; /* holds the length of receiving row */
4714   svalues  = gen_to->values;   /* holds the length of sending row */
4715   nrecvs   = gen_from->n;
4716   nsends   = gen_to->n;
4717 
4718   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4719   srow     = gen_to->indices;   /* local row index to be sent */
4720   sstarts  = gen_to->starts;
4721   sprocs   = gen_to->procs;
4722   sstatus  = gen_to->sstatus;
4723   sbs      = gen_to->bs;
4724   rstarts  = gen_from->starts;
4725   rprocs   = gen_from->procs;
4726   rbs      = gen_from->bs;
4727 
4728   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4729   if (scall == MAT_INITIAL_MATRIX){
4730     /* i-array */
4731     /*---------*/
4732     /*  post receives */
4733     for (i=0; i<nrecvs; i++){
4734       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4735       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4736       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4737     }
4738 
4739     /* pack the outgoing message */
4740     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4741     sstartsj[0] = 0;  rstartsj[0] = 0;
4742     len = 0; /* total length of j or a array to be sent */
4743     k = 0;
4744     for (i=0; i<nsends; i++){
4745       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4746       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4747       for (j=0; j<nrows; j++) {
4748         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4749         for (l=0; l<sbs; l++){
4750           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4751           rowlen[j*sbs+l] = ncols;
4752           len += ncols;
4753           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4754         }
4755         k++;
4756       }
4757       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4758       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4759     }
4760     /* recvs and sends of i-array are completed */
4761     i = nrecvs;
4762     while (i--) {
4763       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4764     }
4765     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4766 
4767     /* allocate buffers for sending j and a arrays */
4768     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4769     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4770 
4771     /* create i-array of B_oth */
4772     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4773     b_othi[0] = 0;
4774     len = 0; /* total length of j or a array to be received */
4775     k = 0;
4776     for (i=0; i<nrecvs; i++){
4777       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4778       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4779       for (j=0; j<nrows; j++) {
4780         b_othi[k+1] = b_othi[k] + rowlen[j];
4781         len += rowlen[j]; k++;
4782       }
4783       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4784     }
4785 
4786     /* allocate space for j and a arrrays of B_oth */
4787     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4788     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4789 
4790     /* j-array */
4791     /*---------*/
4792     /*  post receives of j-array */
4793     for (i=0; i<nrecvs; i++){
4794       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4795       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4796     }
4797 
4798     /* pack the outgoing message j-array */
4799     k = 0;
4800     for (i=0; i<nsends; i++){
4801       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4802       bufJ = bufj+sstartsj[i];
4803       for (j=0; j<nrows; j++) {
4804         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4805         for (ll=0; ll<sbs; ll++){
4806           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4807           for (l=0; l<ncols; l++){
4808             *bufJ++ = cols[l];
4809           }
4810           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4811         }
4812       }
4813       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4814     }
4815 
4816     /* recvs and sends of j-array are completed */
4817     i = nrecvs;
4818     while (i--) {
4819       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4820     }
4821     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4822   } else if (scall == MAT_REUSE_MATRIX){
4823     sstartsj = *startsj;
4824     rstartsj = *startsj_r;
4825     bufa     = *bufa_ptr;
4826     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4827     b_otha   = b_oth->a;
4828   } else {
4829     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4830   }
4831 
4832   /* a-array */
4833   /*---------*/
4834   /*  post receives of a-array */
4835   for (i=0; i<nrecvs; i++){
4836     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4837     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4838   }
4839 
4840   /* pack the outgoing message a-array */
4841   k = 0;
4842   for (i=0; i<nsends; i++){
4843     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4844     bufA = bufa+sstartsj[i];
4845     for (j=0; j<nrows; j++) {
4846       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4847       for (ll=0; ll<sbs; ll++){
4848         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4849         for (l=0; l<ncols; l++){
4850           *bufA++ = vals[l];
4851         }
4852         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4853       }
4854     }
4855     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4856   }
4857   /* recvs and sends of a-array are completed */
4858   i = nrecvs;
4859   while (i--) {
4860     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4861   }
4862   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4863   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4864 
4865   if (scall == MAT_INITIAL_MATRIX){
4866     /* put together the new matrix */
4867     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4868 
4869     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4870     /* Since these are PETSc arrays, change flags to free them as necessary. */
4871     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4872     b_oth->free_a  = PETSC_TRUE;
4873     b_oth->free_ij = PETSC_TRUE;
4874     b_oth->nonew   = 0;
4875 
4876     ierr = PetscFree(bufj);CHKERRQ(ierr);
4877     if (!startsj || !bufa_ptr){
4878       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4879       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4880     } else {
4881       *startsj   = sstartsj;
4882       *startsj_r = rstartsj;
4883       *bufa_ptr  = bufa;
4884     }
4885   }
4886   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4887   PetscFunctionReturn(0);
4888 }
4889 
4890 #undef __FUNCT__
4891 #define __FUNCT__ "MatGetCommunicationStructs"
4892 /*@C
4893   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4894 
4895   Not Collective
4896 
4897   Input Parameters:
4898 . A - The matrix in mpiaij format
4899 
4900   Output Parameter:
4901 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4902 . colmap - A map from global column index to local index into lvec
4903 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4904 
4905   Level: developer
4906 
4907 @*/
4908 #if defined (PETSC_USE_CTABLE)
4909 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4910 #else
4911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4912 #endif
4913 {
4914   Mat_MPIAIJ *a;
4915 
4916   PetscFunctionBegin;
4917   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4918   PetscValidPointer(lvec, 2)
4919   PetscValidPointer(colmap, 3)
4920   PetscValidPointer(multScatter, 4)
4921   a = (Mat_MPIAIJ *) A->data;
4922   if (lvec) *lvec = a->lvec;
4923   if (colmap) *colmap = a->colmap;
4924   if (multScatter) *multScatter = a->Mvctx;
4925   PetscFunctionReturn(0);
4926 }
4927 
4928 EXTERN_C_BEGIN
4929 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4930 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4931 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4932 EXTERN_C_END
4933 
4934 #undef __FUNCT__
4935 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4936 /*
4937     Computes (B'*A')' since computing B*A directly is untenable
4938 
4939                n                       p                          p
4940         (              )       (              )         (                  )
4941       m (      A       )  *  n (       B      )   =   m (         C        )
4942         (              )       (              )         (                  )
4943 
4944 */
4945 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4946 {
4947   PetscErrorCode     ierr;
4948   Mat                At,Bt,Ct;
4949 
4950   PetscFunctionBegin;
4951   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4952   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4953   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4954   ierr = MatDestroy(At);CHKERRQ(ierr);
4955   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4956   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4957   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4958   PetscFunctionReturn(0);
4959 }
4960 
4961 #undef __FUNCT__
4962 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4963 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4964 {
4965   PetscErrorCode ierr;
4966   PetscInt       m=A->rmap->n,n=B->cmap->n;
4967   Mat            Cmat;
4968 
4969   PetscFunctionBegin;
4970   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4971   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4972   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4973   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4974   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4975   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4976   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4977   *C   = Cmat;
4978   PetscFunctionReturn(0);
4979 }
4980 
4981 /* ----------------------------------------------------------------*/
4982 #undef __FUNCT__
4983 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4984 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4985 {
4986   PetscErrorCode ierr;
4987 
4988   PetscFunctionBegin;
4989   if (scall == MAT_INITIAL_MATRIX){
4990     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
4991   }
4992   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
4993   PetscFunctionReturn(0);
4994 }
4995 
4996 EXTERN_C_BEGIN
4997 #if defined(PETSC_HAVE_MUMPS)
4998 extern PetscErrorCode MatGetFactor_mpiaij_mumps(Mat,MatFactorType,Mat*);
4999 #endif
5000 #if defined(PETSC_HAVE_PASTIX)
5001 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5002 #endif
5003 #if defined(PETSC_HAVE_SUPERLU_DIST)
5004 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5005 #endif
5006 #if defined(PETSC_HAVE_SPOOLES)
5007 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5008 #endif
5009 EXTERN_C_END
5010 
5011 /*MC
5012    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5013 
5014    Options Database Keys:
5015 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5016 
5017   Level: beginner
5018 
5019 .seealso: MatCreateMPIAIJ()
5020 M*/
5021 
5022 EXTERN_C_BEGIN
5023 #undef __FUNCT__
5024 #define __FUNCT__ "MatCreate_MPIAIJ"
5025 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5026 {
5027   Mat_MPIAIJ     *b;
5028   PetscErrorCode ierr;
5029   PetscMPIInt    size;
5030 
5031   PetscFunctionBegin;
5032   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5033 
5034   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5035   B->data         = (void*)b;
5036   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5037   B->rmap->bs     = 1;
5038   B->assembled    = PETSC_FALSE;
5039   B->mapping      = 0;
5040 
5041   B->insertmode   = NOT_SET_VALUES;
5042   b->size         = size;
5043   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5044 
5045   /* build cache for off array entries formed */
5046   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5047   b->donotstash  = PETSC_FALSE;
5048   b->colmap      = 0;
5049   b->garray      = 0;
5050   b->roworiented = PETSC_TRUE;
5051 
5052   /* stuff used for matrix vector multiply */
5053   b->lvec      = PETSC_NULL;
5054   b->Mvctx     = PETSC_NULL;
5055 
5056   /* stuff for MatGetRow() */
5057   b->rowindices   = 0;
5058   b->rowvalues    = 0;
5059   b->getrowactive = PETSC_FALSE;
5060 
5061 #if defined(PETSC_HAVE_SPOOLES)
5062   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5063                                      "MatGetFactor_mpiaij_spooles",
5064                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5065 #endif
5066 #if defined(PETSC_HAVE_MUMPS)
5067   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5068                                      "MatGetFactor_mpiaij_mumps",
5069                                      MatGetFactor_mpiaij_mumps);CHKERRQ(ierr);
5070 #endif
5071 #if defined(PETSC_HAVE_PASTIX)
5072   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5073 					   "MatGetFactor_mpiaij_pastix",
5074 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5075 #endif
5076 #if defined(PETSC_HAVE_SUPERLU_DIST)
5077   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5078                                      "MatGetFactor_mpiaij_superlu_dist",
5079                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5080 #endif
5081   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5082                                      "MatStoreValues_MPIAIJ",
5083                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5084   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5085                                      "MatRetrieveValues_MPIAIJ",
5086                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5087   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5088 				     "MatGetDiagonalBlock_MPIAIJ",
5089                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5090   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5091 				     "MatIsTranspose_MPIAIJ",
5092 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5093   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5094 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5095 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5096   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5097 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5098 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5099   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5100 				     "MatDiagonalScaleLocal_MPIAIJ",
5101 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5102   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5103                                      "MatConvert_MPIAIJ_MPICSRPERM",
5104                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5105   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5106                                      "MatConvert_MPIAIJ_MPICRL",
5107                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5108   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5109                                      "MatConvert_MPIAIJ_MPISBAIJ",
5110                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5111   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5112                                      "MatMatMult_MPIDense_MPIAIJ",
5113                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5114   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5115                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5116                                       MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5117   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5118                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5119                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5120   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5121   PetscFunctionReturn(0);
5122 }
5123 EXTERN_C_END
5124 
5125 #undef __FUNCT__
5126 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5127 /*@
5128      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5129          and "off-diagonal" part of the matrix in CSR format.
5130 
5131    Collective on MPI_Comm
5132 
5133    Input Parameters:
5134 +  comm - MPI communicator
5135 .  m - number of local rows (Cannot be PETSC_DECIDE)
5136 .  n - This value should be the same as the local size used in creating the
5137        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5138        calculated if N is given) For square matrices n is almost always m.
5139 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5140 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5141 .   i - row indices for "diagonal" portion of matrix
5142 .   j - column indices
5143 .   a - matrix values
5144 .   oi - row indices for "off-diagonal" portion of matrix
5145 .   oj - column indices
5146 -   oa - matrix values
5147 
5148    Output Parameter:
5149 .   mat - the matrix
5150 
5151    Level: advanced
5152 
5153    Notes:
5154        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5155 
5156        The i and j indices are 0 based
5157 
5158        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5159 
5160        This sets local rows and cannot be used to set off-processor values.
5161 
5162        You cannot later use MatSetValues() to change values in this matrix.
5163 
5164 .keywords: matrix, aij, compressed row, sparse, parallel
5165 
5166 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5167           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5168 @*/
5169 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5170 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5171 {
5172   PetscErrorCode ierr;
5173   Mat_MPIAIJ     *maij;
5174 
5175  PetscFunctionBegin;
5176   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5177   if (i[0]) {
5178     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5179   }
5180   if (oi[0]) {
5181     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5182   }
5183   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5184   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5185   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5186   maij = (Mat_MPIAIJ*) (*mat)->data;
5187   maij->donotstash     = PETSC_TRUE;
5188   (*mat)->preallocated = PETSC_TRUE;
5189 
5190   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5191   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5192   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5193   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5194 
5195   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5196   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5197 
5198   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5199   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5200   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5201   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5202 
5203   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5204   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5205   PetscFunctionReturn(0);
5206 }
5207 
5208 /*
5209     Special version for direct calls from Fortran
5210 */
5211 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5212 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5213 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5214 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5215 #endif
5216 
5217 /* Change these macros so can be used in void function */
5218 #undef CHKERRQ
5219 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5220 #undef SETERRQ2
5221 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5222 #undef SETERRQ
5223 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
5224 
5225 EXTERN_C_BEGIN
5226 #undef __FUNCT__
5227 #define __FUNCT__ "matsetvaluesmpiaij_"
5228 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5229 {
5230   Mat             mat = *mmat;
5231   PetscInt        m = *mm, n = *mn;
5232   InsertMode      addv = *maddv;
5233   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5234   PetscScalar     value;
5235   PetscErrorCode  ierr;
5236 
5237   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5238   if (mat->insertmode == NOT_SET_VALUES) {
5239     mat->insertmode = addv;
5240   }
5241 #if defined(PETSC_USE_DEBUG)
5242   else if (mat->insertmode != addv) {
5243     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5244   }
5245 #endif
5246   {
5247   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5248   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5249   PetscTruth      roworiented = aij->roworiented;
5250 
5251   /* Some Variables required in the macro */
5252   Mat             A = aij->A;
5253   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5254   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5255   MatScalar       *aa = a->a;
5256   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5257   Mat             B = aij->B;
5258   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5259   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5260   MatScalar       *ba = b->a;
5261 
5262   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5263   PetscInt        nonew = a->nonew;
5264   MatScalar       *ap1,*ap2;
5265 
5266   PetscFunctionBegin;
5267   for (i=0; i<m; i++) {
5268     if (im[i] < 0) continue;
5269 #if defined(PETSC_USE_DEBUG)
5270     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5271 #endif
5272     if (im[i] >= rstart && im[i] < rend) {
5273       row      = im[i] - rstart;
5274       lastcol1 = -1;
5275       rp1      = aj + ai[row];
5276       ap1      = aa + ai[row];
5277       rmax1    = aimax[row];
5278       nrow1    = ailen[row];
5279       low1     = 0;
5280       high1    = nrow1;
5281       lastcol2 = -1;
5282       rp2      = bj + bi[row];
5283       ap2      = ba + bi[row];
5284       rmax2    = bimax[row];
5285       nrow2    = bilen[row];
5286       low2     = 0;
5287       high2    = nrow2;
5288 
5289       for (j=0; j<n; j++) {
5290         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5291         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5292         if (in[j] >= cstart && in[j] < cend){
5293           col = in[j] - cstart;
5294           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5295         } else if (in[j] < 0) continue;
5296 #if defined(PETSC_USE_DEBUG)
5297         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5298 #endif
5299         else {
5300           if (mat->was_assembled) {
5301             if (!aij->colmap) {
5302               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5303             }
5304 #if defined (PETSC_USE_CTABLE)
5305             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5306 	    col--;
5307 #else
5308             col = aij->colmap[in[j]] - 1;
5309 #endif
5310             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5311               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5312               col =  in[j];
5313               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5314               B = aij->B;
5315               b = (Mat_SeqAIJ*)B->data;
5316               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5317               rp2      = bj + bi[row];
5318               ap2      = ba + bi[row];
5319               rmax2    = bimax[row];
5320               nrow2    = bilen[row];
5321               low2     = 0;
5322               high2    = nrow2;
5323               bm       = aij->B->rmap->n;
5324               ba = b->a;
5325             }
5326           } else col = in[j];
5327           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5328         }
5329       }
5330     } else {
5331       if (!aij->donotstash) {
5332         if (roworiented) {
5333           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5334         } else {
5335           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5336         }
5337       }
5338     }
5339   }}
5340   PetscFunctionReturnVoid();
5341 }
5342 EXTERN_C_END
5343 
5344