xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f3fe499b4cc4d64bf04aa4f5e4963dcc4eb56541)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscTruth     roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscTruth     ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (!aij->donotstash) {
394         if (roworiented) {
395           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
396         } else {
397           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         }
399       }
400     }
401   }
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatGetValues_MPIAIJ"
407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
408 {
409   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
412   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
413 
414   PetscFunctionBegin;
415   for (i=0; i<m; i++) {
416     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
417     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
418     if (idxm[i] >= rstart && idxm[i] < rend) {
419       row = idxm[i] - rstart;
420       for (j=0; j<n; j++) {
421         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
422         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
423         if (idxn[j] >= cstart && idxn[j] < cend){
424           col = idxn[j] - cstart;
425           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
426         } else {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
429           }
430 #if defined (PETSC_USE_CTABLE)
431           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
432           col --;
433 #else
434           col = aij->colmap[idxn[j]] - 1;
435 #endif
436           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
437           else {
438             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
439           }
440         }
441       }
442     } else {
443       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
444     }
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscErrorCode ierr;
457   PetscInt       nstash,reallocs;
458   InsertMode     addv;
459 
460   PetscFunctionBegin;
461   if (aij->donotstash) {
462     PetscFunctionReturn(0);
463   }
464 
465   /* make sure all processors are either in INSERTMODE or ADDMODE */
466   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
467   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
468   mat->insertmode = addv; /* in case this processor had no cache */
469 
470   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
471   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
472   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
479 {
480   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
481   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
482   PetscErrorCode ierr;
483   PetscMPIInt    n;
484   PetscInt       i,j,rstart,ncols,flg;
485   PetscInt       *row,*col;
486   PetscTruth     other_disassembled;
487   PetscScalar    *val;
488   InsertMode     addv = mat->insertmode;
489 
490   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
491   PetscFunctionBegin;
492   if (!aij->donotstash) {
493     while (1) {
494       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
495       if (!flg) break;
496 
497       for (i=0; i<n;) {
498         /* Now identify the consecutive vals belonging to the same row */
499         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
500         if (j < n) ncols = j-i;
501         else       ncols = n-i;
502         /* Now assemble all these values with a single function call */
503         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
504         i = j;
505       }
506     }
507     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
508   }
509   a->compressedrow.use     = PETSC_FALSE;
510   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
511   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
512 
513   /* determine if any processor has disassembled, if so we must
514      also disassemble ourselfs, in order that we may reassemble. */
515   /*
516      if nonzero structure of submatrix B cannot change then we know that
517      no processor disassembled thus we can skip this stuff
518   */
519   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
520     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
521     if (mat->was_assembled && !other_disassembled) {
522       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523     }
524   }
525   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
526     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
527   }
528   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
529   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
530   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
531   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
532 
533   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
534   aij->rowvalues = 0;
535 
536   /* used by MatAXPY() */
537   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
538   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
539 
540   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
541   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
548 {
549   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
550   PetscErrorCode ierr;
551 
552   PetscFunctionBegin;
553   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
554   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 
558 #undef __FUNCT__
559 #define __FUNCT__ "MatZeroRows_MPIAIJ"
560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
561 {
562   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
563   PetscErrorCode ierr;
564   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
565   PetscInt       i,*owners = A->rmap->range;
566   PetscInt       *nprocs,j,idx,nsends,row;
567   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
568   PetscInt       *rvalues,count,base,slen,*source;
569   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
570   MPI_Comm       comm = ((PetscObject)A)->comm;
571   MPI_Request    *send_waits,*recv_waits;
572   MPI_Status     recv_status,*send_status;
573 #if defined(PETSC_DEBUG)
574   PetscTruth     found = PETSC_FALSE;
575 #endif
576 
577   PetscFunctionBegin;
578   /*  first count number of contributors to each processor */
579   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
580   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
581   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
582   j = 0;
583   for (i=0; i<N; i++) {
584     if (lastidx > (idx = rows[i])) j = 0;
585     lastidx = idx;
586     for (; j<size; j++) {
587       if (idx >= owners[j] && idx < owners[j+1]) {
588         nprocs[2*j]++;
589         nprocs[2*j+1] = 1;
590         owner[i] = j;
591 #if defined(PETSC_DEBUG)
592         found = PETSC_TRUE;
593 #endif
594         break;
595       }
596     }
597 #if defined(PETSC_DEBUG)
598     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
599     found = PETSC_FALSE;
600 #endif
601   }
602   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
603 
604   /* inform other processors of number of messages and max length*/
605   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
606 
607   /* post receives:   */
608   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
609   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
610   for (i=0; i<nrecvs; i++) {
611     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
612   }
613 
614   /* do sends:
615       1) starts[i] gives the starting index in svalues for stuff going to
616          the ith processor
617   */
618   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
619   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
620   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
621   starts[0] = 0;
622   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
623   for (i=0; i<N; i++) {
624     svalues[starts[owner[i]]++] = rows[i];
625   }
626 
627   starts[0] = 0;
628   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
629   count = 0;
630   for (i=0; i<size; i++) {
631     if (nprocs[2*i+1]) {
632       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
633     }
634   }
635   ierr = PetscFree(starts);CHKERRQ(ierr);
636 
637   base = owners[rank];
638 
639   /*  wait on receives */
640   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
641   count  = nrecvs; slen = 0;
642   while (count) {
643     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
644     /* unpack receives into our local space */
645     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
646     source[imdex]  = recv_status.MPI_SOURCE;
647     lens[imdex]    = n;
648     slen          += n;
649     count--;
650   }
651   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
652 
653   /* move the data into the send scatter */
654   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
655   count = 0;
656   for (i=0; i<nrecvs; i++) {
657     values = rvalues + i*nmax;
658     for (j=0; j<lens[i]; j++) {
659       lrows[count++] = values[j] - base;
660     }
661   }
662   ierr = PetscFree(rvalues);CHKERRQ(ierr);
663   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
664   ierr = PetscFree(owner);CHKERRQ(ierr);
665   ierr = PetscFree(nprocs);CHKERRQ(ierr);
666 
667   /* actually zap the local rows */
668   /*
669         Zero the required rows. If the "diagonal block" of the matrix
670      is square and the user wishes to set the diagonal we use separate
671      code so that MatSetValues() is not called for each diagonal allocating
672      new memory, thus calling lots of mallocs and slowing things down.
673 
674   */
675   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
676   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
677   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
678     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
679   } else if (diag != 0.0) {
680     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
681     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
682       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
684     }
685     for (i = 0; i < slen; i++) {
686       row  = lrows[i] + rstart;
687       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
688     }
689     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
690     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
691   } else {
692     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
693   }
694   ierr = PetscFree(lrows);CHKERRQ(ierr);
695 
696   /* wait on sends */
697   if (nsends) {
698     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
699     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
700     ierr = PetscFree(send_status);CHKERRQ(ierr);
701   }
702   ierr = PetscFree(send_waits);CHKERRQ(ierr);
703   ierr = PetscFree(svalues);CHKERRQ(ierr);
704 
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatMult_MPIAIJ"
710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
711 {
712   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
713   PetscErrorCode ierr;
714   PetscInt       nt;
715 
716   PetscFunctionBegin;
717   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
718   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
719   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
720   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
721   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
722   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
729 {
730   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
731   PetscErrorCode ierr;
732 
733   PetscFunctionBegin;
734   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatMultAdd_MPIAIJ"
740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
741 {
742   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
743   PetscErrorCode ierr;
744 
745   PetscFunctionBegin;
746   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
747   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
748   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
749   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNCT__
754 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
756 {
757   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
758   PetscErrorCode ierr;
759   PetscTruth     merged;
760 
761   PetscFunctionBegin;
762   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
763   /* do nondiagonal part */
764   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
765   if (!merged) {
766     /* send it on its way */
767     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
768     /* do local part */
769     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
770     /* receive remote parts: note this assumes the values are not actually */
771     /* added in yy until the next line, */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   } else {
774     /* do local part */
775     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
776     /* send it on its way */
777     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778     /* values actually were received in the Begin() but we need to call this nop */
779     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 EXTERN_C_BEGIN
785 #undef __FUNCT__
786 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
788 {
789   MPI_Comm       comm;
790   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
791   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
792   IS             Me,Notme;
793   PetscErrorCode ierr;
794   PetscInt       M,N,first,last,*notme,i;
795   PetscMPIInt    size;
796 
797   PetscFunctionBegin;
798 
799   /* Easy test: symmetric diagonal block */
800   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
801   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
802   if (!*f) PetscFunctionReturn(0);
803   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
804   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
805   if (size == 1) PetscFunctionReturn(0);
806 
807   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
808   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
809   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
810   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
811   for (i=0; i<first; i++) notme[i] = i;
812   for (i=last; i<M; i++) notme[i-last+first] = i;
813   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
814   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
815   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
816   Aoff = Aoffs[0];
817   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
818   Boff = Boffs[0];
819   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
820   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
821   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
822   ierr = ISDestroy(Me);CHKERRQ(ierr);
823   ierr = ISDestroy(Notme);CHKERRQ(ierr);
824 
825   PetscFunctionReturn(0);
826 }
827 EXTERN_C_END
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
832 {
833   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
834   PetscErrorCode ierr;
835 
836   PetscFunctionBegin;
837   /* do nondiagonal part */
838   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
839   /* send it on its way */
840   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
841   /* do local part */
842   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
843   /* receive remote parts */
844   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 /*
849   This only works correctly for square matrices where the subblock A->A is the
850    diagonal block
851 */
852 #undef __FUNCT__
853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
855 {
856   PetscErrorCode ierr;
857   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
858 
859   PetscFunctionBegin;
860   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
861   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
862   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatScale_MPIAIJ"
868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
869 {
870   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
871   PetscErrorCode ierr;
872 
873   PetscFunctionBegin;
874   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
875   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
876   PetscFunctionReturn(0);
877 }
878 
879 #undef __FUNCT__
880 #define __FUNCT__ "MatDestroy_MPIAIJ"
881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
882 {
883   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
884   PetscErrorCode ierr;
885 
886   PetscFunctionBegin;
887 #if defined(PETSC_USE_LOG)
888   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
889 #endif
890   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
891   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
892   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
893   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
894 #if defined (PETSC_USE_CTABLE)
895   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
896 #else
897   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
898 #endif
899   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
900   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
901   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
902   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
903   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
904   ierr = PetscFree(aij);CHKERRQ(ierr);
905 
906   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
908   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
912   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
915   PetscFunctionReturn(0);
916 }
917 
918 #undef __FUNCT__
919 #define __FUNCT__ "MatView_MPIAIJ_Binary"
920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
921 {
922   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
923   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
924   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
925   PetscErrorCode    ierr;
926   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
927   int               fd;
928   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
929   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
930   PetscScalar       *column_values;
931 
932   PetscFunctionBegin;
933   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
934   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
935   nz   = A->nz + B->nz;
936   if (!rank) {
937     header[0] = MAT_FILE_CLASSID;
938     header[1] = mat->rmap->N;
939     header[2] = mat->cmap->N;
940     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
941     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
942     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
943     /* get largest number of rows any processor has */
944     rlen = mat->rmap->n;
945     range = mat->rmap->range;
946     for (i=1; i<size; i++) {
947       rlen = PetscMax(rlen,range[i+1] - range[i]);
948     }
949   } else {
950     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
951     rlen = mat->rmap->n;
952   }
953 
954   /* load up the local row counts */
955   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
956   for (i=0; i<mat->rmap->n; i++) {
957     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
958   }
959 
960   /* store the row lengths to the file */
961   if (!rank) {
962     MPI_Status status;
963     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
964     for (i=1; i<size; i++) {
965       rlen = range[i+1] - range[i];
966       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
967       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
968     }
969   } else {
970     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
971   }
972   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
973 
974   /* load up the local column indices */
975   nzmax = nz; /* )th processor needs space a largest processor needs */
976   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
977   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
978   cnt  = 0;
979   for (i=0; i<mat->rmap->n; i++) {
980     for (j=B->i[i]; j<B->i[i+1]; j++) {
981       if ( (col = garray[B->j[j]]) > cstart) break;
982       column_indices[cnt++] = col;
983     }
984     for (k=A->i[i]; k<A->i[i+1]; k++) {
985       column_indices[cnt++] = A->j[k] + cstart;
986     }
987     for (; j<B->i[i+1]; j++) {
988       column_indices[cnt++] = garray[B->j[j]];
989     }
990   }
991   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
992 
993   /* store the column indices to the file */
994   if (!rank) {
995     MPI_Status status;
996     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
997     for (i=1; i<size; i++) {
998       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
999       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1000       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1001       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1002     }
1003   } else {
1004     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1005     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1006   }
1007   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1008 
1009   /* load up the local column values */
1010   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1011   cnt  = 0;
1012   for (i=0; i<mat->rmap->n; i++) {
1013     for (j=B->i[i]; j<B->i[i+1]; j++) {
1014       if ( garray[B->j[j]] > cstart) break;
1015       column_values[cnt++] = B->a[j];
1016     }
1017     for (k=A->i[i]; k<A->i[i+1]; k++) {
1018       column_values[cnt++] = A->a[k];
1019     }
1020     for (; j<B->i[i+1]; j++) {
1021       column_values[cnt++] = B->a[j];
1022     }
1023   }
1024   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1025 
1026   /* store the column values to the file */
1027   if (!rank) {
1028     MPI_Status status;
1029     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1030     for (i=1; i<size; i++) {
1031       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1032       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1033       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1034       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1035     }
1036   } else {
1037     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1038     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1039   }
1040   ierr = PetscFree(column_values);CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1047 {
1048   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1049   PetscErrorCode    ierr;
1050   PetscMPIInt       rank = aij->rank,size = aij->size;
1051   PetscTruth        isdraw,iascii,isbinary;
1052   PetscViewer       sviewer;
1053   PetscViewerFormat format;
1054 
1055   PetscFunctionBegin;
1056   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1057   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1058   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1059   if (iascii) {
1060     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1061     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1062       MatInfo    info;
1063       PetscTruth inodes;
1064 
1065       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1066       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1067       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1068       if (!inodes) {
1069         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1070 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1071       } else {
1072         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1073 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1074       }
1075       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1076       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1077       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1078       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1079       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1080       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1081       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1082       PetscFunctionReturn(0);
1083     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1084       PetscInt   inodecount,inodelimit,*inodes;
1085       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1086       if (inodes) {
1087         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1088       } else {
1089         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1090       }
1091       PetscFunctionReturn(0);
1092     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1093       PetscFunctionReturn(0);
1094     }
1095   } else if (isbinary) {
1096     if (size == 1) {
1097       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1098       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1099     } else {
1100       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1101     }
1102     PetscFunctionReturn(0);
1103   } else if (isdraw) {
1104     PetscDraw  draw;
1105     PetscTruth isnull;
1106     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1107     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1108   }
1109 
1110   if (size == 1) {
1111     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1112     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1113   } else {
1114     /* assemble the entire matrix onto first processor. */
1115     Mat         A;
1116     Mat_SeqAIJ  *Aloc;
1117     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1118     MatScalar   *a;
1119 
1120     if (mat->rmap->N > 1024) {
1121       PetscTruth flg = PETSC_FALSE;
1122 
1123       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1124       if (!flg) {
1125         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1126       }
1127     }
1128 
1129     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1130     if (!rank) {
1131       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1132     } else {
1133       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1134     }
1135     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1136     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1137     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1138     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1139 
1140     /* copy over the A part */
1141     Aloc = (Mat_SeqAIJ*)aij->A->data;
1142     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1143     row = mat->rmap->rstart;
1144     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1148     }
1149     aj = Aloc->j;
1150     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1151 
1152     /* copy over the B part */
1153     Aloc = (Mat_SeqAIJ*)aij->B->data;
1154     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1155     row  = mat->rmap->rstart;
1156     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1157     ct   = cols;
1158     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1159     for (i=0; i<m; i++) {
1160       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1161       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1162     }
1163     ierr = PetscFree(ct);CHKERRQ(ierr);
1164     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1165     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1166     /*
1167        Everyone has to call to draw the matrix since the graphics waits are
1168        synchronized across all processors that share the PetscDraw object
1169     */
1170     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1171     if (!rank) {
1172       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1173       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1174     }
1175     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1176     ierr = MatDestroy(A);CHKERRQ(ierr);
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatView_MPIAIJ"
1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1184 {
1185   PetscErrorCode ierr;
1186   PetscTruth     iascii,isdraw,issocket,isbinary;
1187 
1188   PetscFunctionBegin;
1189   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1190   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1191   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1192   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1193   if (iascii || isdraw || isbinary || issocket) {
1194     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1195   } else {
1196     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1197   }
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatSOR_MPIAIJ"
1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1204 {
1205   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1206   PetscErrorCode ierr;
1207   Vec            bb1 = 0;
1208   PetscTruth     hasop;
1209 
1210   PetscFunctionBegin;
1211   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1212     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1213   }
1214 
1215   if (flag == SOR_APPLY_UPPER) {
1216     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1217     PetscFunctionReturn(0);
1218   }
1219 
1220   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1221     if (flag & SOR_ZERO_INITIAL_GUESS) {
1222       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1223       its--;
1224     }
1225 
1226     while (its--) {
1227       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1228       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229 
1230       /* update rhs: bb1 = bb - B*x */
1231       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1232       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1233 
1234       /* local sweep */
1235       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1236     }
1237   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1238     if (flag & SOR_ZERO_INITIAL_GUESS) {
1239       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1240       its--;
1241     }
1242     while (its--) {
1243       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1244       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245 
1246       /* update rhs: bb1 = bb - B*x */
1247       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1248       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1249 
1250       /* local sweep */
1251       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1252     }
1253   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1254     if (flag & SOR_ZERO_INITIAL_GUESS) {
1255       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1256       its--;
1257     }
1258     while (its--) {
1259       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1260       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261 
1262       /* update rhs: bb1 = bb - B*x */
1263       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1264       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1265 
1266       /* local sweep */
1267       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1268     }
1269   }  else if (flag & SOR_EISENSTAT) {
1270     Vec         xx1;
1271 
1272     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1273     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1274 
1275     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1277     if (!mat->diag) {
1278       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1279       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1280     }
1281     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1282     if (hasop) {
1283       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1284     } else {
1285       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1286     }
1287     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1288 
1289     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1290 
1291     /* local sweep */
1292     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1293     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1294     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1295   } else {
1296     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1297   }
1298 
1299   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatPermute_MPIAIJ"
1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1306 {
1307   MPI_Comm       comm,pcomm;
1308   PetscInt       first,local_size,nrows;
1309   const PetscInt *rows;
1310   PetscMPIInt    size;
1311   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1316   /* make a collective version of 'rowp' */
1317   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1318   if (pcomm==comm) {
1319     crowp = rowp;
1320   } else {
1321     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1322     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1323     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1324     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1325   }
1326   /* collect the global row permutation and invert it */
1327   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1328   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1329   if (pcomm!=comm) {
1330     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1331   }
1332   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1333   /* get the local target indices */
1334   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1335   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1336   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1337   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1338   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1339   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1340   /* the column permutation is so much easier;
1341      make a local version of 'colp' and invert it */
1342   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1343   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1344   if (size==1) {
1345     lcolp = colp;
1346   } else {
1347     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1348     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1349     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1350   }
1351   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1352   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1353   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1354   if (size>1) {
1355     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1357   }
1358   /* now we just get the submatrix */
1359   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1360   /* clean up */
1361   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1362   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1369 {
1370   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1371   Mat            A = mat->A,B = mat->B;
1372   PetscErrorCode ierr;
1373   PetscReal      isend[5],irecv[5];
1374 
1375   PetscFunctionBegin;
1376   info->block_size     = 1.0;
1377   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1378   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1379   isend[3] = info->memory;  isend[4] = info->mallocs;
1380   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1381   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1382   isend[3] += info->memory;  isend[4] += info->mallocs;
1383   if (flag == MAT_LOCAL) {
1384     info->nz_used      = isend[0];
1385     info->nz_allocated = isend[1];
1386     info->nz_unneeded  = isend[2];
1387     info->memory       = isend[3];
1388     info->mallocs      = isend[4];
1389   } else if (flag == MAT_GLOBAL_MAX) {
1390     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1391     info->nz_used      = irecv[0];
1392     info->nz_allocated = irecv[1];
1393     info->nz_unneeded  = irecv[2];
1394     info->memory       = irecv[3];
1395     info->mallocs      = irecv[4];
1396   } else if (flag == MAT_GLOBAL_SUM) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   }
1404   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1405   info->fill_ratio_needed = 0;
1406   info->factor_mallocs    = 0;
1407 
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatSetOption_MPIAIJ"
1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1414 {
1415   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   switch (op) {
1420   case MAT_NEW_NONZERO_LOCATIONS:
1421   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1422   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1423   case MAT_KEEP_NONZERO_PATTERN:
1424   case MAT_NEW_NONZERO_LOCATION_ERR:
1425   case MAT_USE_INODES:
1426   case MAT_IGNORE_ZERO_ENTRIES:
1427     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1428     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1429     break;
1430   case MAT_ROW_ORIENTED:
1431     a->roworiented = flg;
1432     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1433     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1434     break;
1435   case MAT_NEW_DIAGONALS:
1436     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1437     break;
1438   case MAT_IGNORE_OFF_PROC_ENTRIES:
1439     a->donotstash = PETSC_TRUE;
1440     break;
1441   case MAT_SPD:
1442     A->spd_set                         = PETSC_TRUE;
1443     A->spd                             = flg;
1444     if (flg) {
1445       A->symmetric                     = PETSC_TRUE;
1446       A->structurally_symmetric        = PETSC_TRUE;
1447       A->symmetric_set                 = PETSC_TRUE;
1448       A->structurally_symmetric_set    = PETSC_TRUE;
1449     }
1450     break;
1451   case MAT_SYMMETRIC:
1452     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1453     break;
1454   case MAT_STRUCTURALLY_SYMMETRIC:
1455     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1456     break;
1457   case MAT_HERMITIAN:
1458     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1459     break;
1460   case MAT_SYMMETRY_ETERNAL:
1461     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1462     break;
1463   default:
1464     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1465   }
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatGetRow_MPIAIJ"
1471 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1472 {
1473   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1474   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1475   PetscErrorCode ierr;
1476   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1477   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1478   PetscInt       *cmap,*idx_p;
1479 
1480   PetscFunctionBegin;
1481   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1482   mat->getrowactive = PETSC_TRUE;
1483 
1484   if (!mat->rowvalues && (idx || v)) {
1485     /*
1486         allocate enough space to hold information from the longest row.
1487     */
1488     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1489     PetscInt   max = 1,tmp;
1490     for (i=0; i<matin->rmap->n; i++) {
1491       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1492       if (max < tmp) { max = tmp; }
1493     }
1494     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1495   }
1496 
1497   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1498   lrow = row - rstart;
1499 
1500   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1501   if (!v)   {pvA = 0; pvB = 0;}
1502   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1503   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1504   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1505   nztot = nzA + nzB;
1506 
1507   cmap  = mat->garray;
1508   if (v  || idx) {
1509     if (nztot) {
1510       /* Sort by increasing column numbers, assuming A and B already sorted */
1511       PetscInt imark = -1;
1512       if (v) {
1513         *v = v_p = mat->rowvalues;
1514         for (i=0; i<nzB; i++) {
1515           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1516           else break;
1517         }
1518         imark = i;
1519         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1520         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1521       }
1522       if (idx) {
1523         *idx = idx_p = mat->rowindices;
1524         if (imark > -1) {
1525           for (i=0; i<imark; i++) {
1526             idx_p[i] = cmap[cworkB[i]];
1527           }
1528         } else {
1529           for (i=0; i<nzB; i++) {
1530             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1531             else break;
1532           }
1533           imark = i;
1534         }
1535         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1536         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1537       }
1538     } else {
1539       if (idx) *idx = 0;
1540       if (v)   *v   = 0;
1541     }
1542   }
1543   *nz = nztot;
1544   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1545   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1551 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1552 {
1553   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1554 
1555   PetscFunctionBegin;
1556   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1557   aij->getrowactive = PETSC_FALSE;
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatNorm_MPIAIJ"
1563 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1564 {
1565   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1566   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1567   PetscErrorCode ierr;
1568   PetscInt       i,j,cstart = mat->cmap->rstart;
1569   PetscReal      sum = 0.0;
1570   MatScalar      *v;
1571 
1572   PetscFunctionBegin;
1573   if (aij->size == 1) {
1574     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1575   } else {
1576     if (type == NORM_FROBENIUS) {
1577       v = amat->a;
1578       for (i=0; i<amat->nz; i++) {
1579 #if defined(PETSC_USE_COMPLEX)
1580         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1581 #else
1582         sum += (*v)*(*v); v++;
1583 #endif
1584       }
1585       v = bmat->a;
1586       for (i=0; i<bmat->nz; i++) {
1587 #if defined(PETSC_USE_COMPLEX)
1588         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1589 #else
1590         sum += (*v)*(*v); v++;
1591 #endif
1592       }
1593       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1594       *norm = sqrt(*norm);
1595     } else if (type == NORM_1) { /* max column norm */
1596       PetscReal *tmp,*tmp2;
1597       PetscInt  *jj,*garray = aij->garray;
1598       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1599       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1600       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1601       *norm = 0.0;
1602       v = amat->a; jj = amat->j;
1603       for (j=0; j<amat->nz; j++) {
1604         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1605       }
1606       v = bmat->a; jj = bmat->j;
1607       for (j=0; j<bmat->nz; j++) {
1608         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1609       }
1610       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1611       for (j=0; j<mat->cmap->N; j++) {
1612         if (tmp2[j] > *norm) *norm = tmp2[j];
1613       }
1614       ierr = PetscFree(tmp);CHKERRQ(ierr);
1615       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1616     } else if (type == NORM_INFINITY) { /* max row norm */
1617       PetscReal ntemp = 0.0;
1618       for (j=0; j<aij->A->rmap->n; j++) {
1619         v = amat->a + amat->i[j];
1620         sum = 0.0;
1621         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1622           sum += PetscAbsScalar(*v); v++;
1623         }
1624         v = bmat->a + bmat->i[j];
1625         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1626           sum += PetscAbsScalar(*v); v++;
1627         }
1628         if (sum > ntemp) ntemp = sum;
1629       }
1630       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1631     } else {
1632       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1633     }
1634   }
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatTranspose_MPIAIJ"
1640 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1641 {
1642   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1643   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1644   PetscErrorCode ierr;
1645   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1646   PetscInt       cstart=A->cmap->rstart,ncol;
1647   Mat            B;
1648   MatScalar      *array;
1649 
1650   PetscFunctionBegin;
1651   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1652 
1653   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1654   ai = Aloc->i; aj = Aloc->j;
1655   bi = Bloc->i; bj = Bloc->j;
1656   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1657     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1658     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1659     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1660     for (i=0; i<ai[ma]; i++){
1661       d_nnz[aj[i]] ++;
1662       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1663     }
1664 
1665     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1666     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1667     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1668     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1669     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1670   } else {
1671     B = *matout;
1672   }
1673 
1674   /* copy over the A part */
1675   array = Aloc->a;
1676   row = A->rmap->rstart;
1677   for (i=0; i<ma; i++) {
1678     ncol = ai[i+1]-ai[i];
1679     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1680     row++; array += ncol; aj += ncol;
1681   }
1682   aj = Aloc->j;
1683   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1684 
1685   /* copy over the B part */
1686   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1687   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1688   array = Bloc->a;
1689   row = A->rmap->rstart;
1690   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1691   cols_tmp = cols;
1692   for (i=0; i<mb; i++) {
1693     ncol = bi[i+1]-bi[i];
1694     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1695     row++; array += ncol; cols_tmp += ncol;
1696   }
1697   ierr = PetscFree(cols);CHKERRQ(ierr);
1698 
1699   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1701   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1702     *matout = B;
1703   } else {
1704     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1705   }
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1711 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1712 {
1713   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1714   Mat            a = aij->A,b = aij->B;
1715   PetscErrorCode ierr;
1716   PetscInt       s1,s2,s3;
1717 
1718   PetscFunctionBegin;
1719   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1720   if (rr) {
1721     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1722     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1723     /* Overlap communication with computation. */
1724     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1725   }
1726   if (ll) {
1727     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1728     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1729     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1730   }
1731   /* scale  the diagonal block */
1732   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1733 
1734   if (rr) {
1735     /* Do a scatter end and then right scale the off-diagonal block */
1736     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1737     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1738   }
1739 
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1745 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1746 {
1747   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1748   PetscErrorCode ierr;
1749 
1750   PetscFunctionBegin;
1751   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1752   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1753   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1754   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1755   PetscFunctionReturn(0);
1756 }
1757 #undef __FUNCT__
1758 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1759 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1760 {
1761   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1762   PetscErrorCode ierr;
1763 
1764   PetscFunctionBegin;
1765   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 #undef __FUNCT__
1770 #define __FUNCT__ "MatEqual_MPIAIJ"
1771 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1772 {
1773   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1774   Mat            a,b,c,d;
1775   PetscTruth     flg;
1776   PetscErrorCode ierr;
1777 
1778   PetscFunctionBegin;
1779   a = matA->A; b = matA->B;
1780   c = matB->A; d = matB->B;
1781 
1782   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1783   if (flg) {
1784     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1785   }
1786   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 #undef __FUNCT__
1791 #define __FUNCT__ "MatCopy_MPIAIJ"
1792 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1793 {
1794   PetscErrorCode ierr;
1795   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1796   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1797 
1798   PetscFunctionBegin;
1799   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1800   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1801     /* because of the column compression in the off-processor part of the matrix a->B,
1802        the number of columns in a->B and b->B may be different, hence we cannot call
1803        the MatCopy() directly on the two parts. If need be, we can provide a more
1804        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1805        then copying the submatrices */
1806     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1807   } else {
1808     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1809     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1810   }
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 #undef __FUNCT__
1815 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1816 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1817 {
1818   PetscErrorCode ierr;
1819 
1820   PetscFunctionBegin;
1821   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatAXPY_MPIAIJ"
1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1828 {
1829   PetscErrorCode ierr;
1830   PetscInt       i;
1831   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1832   PetscBLASInt   bnz,one=1;
1833   Mat_SeqAIJ     *x,*y;
1834 
1835   PetscFunctionBegin;
1836   if (str == SAME_NONZERO_PATTERN) {
1837     PetscScalar alpha = a;
1838     x = (Mat_SeqAIJ *)xx->A->data;
1839     y = (Mat_SeqAIJ *)yy->A->data;
1840     bnz = PetscBLASIntCast(x->nz);
1841     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1842     x = (Mat_SeqAIJ *)xx->B->data;
1843     y = (Mat_SeqAIJ *)yy->B->data;
1844     bnz = PetscBLASIntCast(x->nz);
1845     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1846   } else if (str == SUBSET_NONZERO_PATTERN) {
1847     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1848 
1849     x = (Mat_SeqAIJ *)xx->B->data;
1850     y = (Mat_SeqAIJ *)yy->B->data;
1851     if (y->xtoy && y->XtoY != xx->B) {
1852       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1853       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1854     }
1855     if (!y->xtoy) { /* get xtoy */
1856       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1857       y->XtoY = xx->B;
1858       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1859     }
1860     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1861   } else {
1862     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1863   }
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatConjugate_MPIAIJ"
1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1872 {
1873 #if defined(PETSC_USE_COMPLEX)
1874   PetscErrorCode ierr;
1875   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1876 
1877   PetscFunctionBegin;
1878   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1879   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1880 #else
1881   PetscFunctionBegin;
1882 #endif
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatRealPart_MPIAIJ"
1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1889 {
1890   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1891   PetscErrorCode ierr;
1892 
1893   PetscFunctionBegin;
1894   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1895   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1902 {
1903   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1904   PetscErrorCode ierr;
1905 
1906   PetscFunctionBegin;
1907   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1908   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #ifdef PETSC_HAVE_PBGL
1913 
1914 #include <boost/parallel/mpi/bsp_process_group.hpp>
1915 #include <boost/graph/distributed/ilu_default_graph.hpp>
1916 #include <boost/graph/distributed/ilu_0_block.hpp>
1917 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1918 #include <boost/graph/distributed/petsc/interface.hpp>
1919 #include <boost/multi_array.hpp>
1920 #include <boost/parallel/distributed_property_map->hpp>
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1924 /*
1925   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1926 */
1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1928 {
1929   namespace petsc = boost::distributed::petsc;
1930 
1931   namespace graph_dist = boost::graph::distributed;
1932   using boost::graph::distributed::ilu_default::process_group_type;
1933   using boost::graph::ilu_permuted;
1934 
1935   PetscTruth      row_identity, col_identity;
1936   PetscContainer  c;
1937   PetscInt        m, n, M, N;
1938   PetscErrorCode  ierr;
1939 
1940   PetscFunctionBegin;
1941   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1942   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1943   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1944   if (!row_identity || !col_identity) {
1945     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1946   }
1947 
1948   process_group_type pg;
1949   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1950   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1951   lgraph_type&   level_graph = *lgraph_p;
1952   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1953 
1954   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1955   ilu_permuted(level_graph);
1956 
1957   /* put together the new matrix */
1958   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1959   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1960   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1961   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1962   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1963   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1964   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1965 
1966   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1967   ierr = PetscContainerSetPointer(c, lgraph_p);
1968   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1975 {
1976   PetscFunctionBegin;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatSolve_MPIAIJ"
1982 /*
1983   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1984 */
1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1986 {
1987   namespace graph_dist = boost::graph::distributed;
1988 
1989   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1990   lgraph_type*   lgraph_p;
1991   PetscContainer c;
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1996   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1997   ierr = VecCopy(b, x);CHKERRQ(ierr);
1998 
1999   PetscScalar* array_x;
2000   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2001   PetscInt sx;
2002   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2003 
2004   PetscScalar* array_b;
2005   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2006   PetscInt sb;
2007   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2008 
2009   lgraph_type&   level_graph = *lgraph_p;
2010   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2011 
2012   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2013   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2014                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2015 
2016   typedef boost::iterator_property_map<array_ref_type::iterator,
2017                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2018   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2019                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2020 
2021   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2022 
2023   PetscFunctionReturn(0);
2024 }
2025 #endif
2026 
2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2028   PetscInt       nzlocal,nsends,nrecvs;
2029   PetscMPIInt    *send_rank,*recv_rank;
2030   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2031   PetscScalar    *sbuf_a,**rbuf_a;
2032   PetscErrorCode (*MatDestroy)(Mat);
2033 } Mat_Redundant;
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2038 {
2039   PetscErrorCode       ierr;
2040   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2041   PetscInt             i;
2042 
2043   PetscFunctionBegin;
2044   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2045   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2046   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2047   for (i=0; i<redund->nrecvs; i++){
2048     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2049     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2050   }
2051   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2052   ierr = PetscFree(redund);CHKERRQ(ierr);
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 #undef __FUNCT__
2057 #define __FUNCT__ "MatDestroy_MatRedundant"
2058 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2059 {
2060   PetscErrorCode  ierr;
2061   PetscContainer  container;
2062   Mat_Redundant   *redund=PETSC_NULL;
2063 
2064   PetscFunctionBegin;
2065   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2066   if (container) {
2067     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2068   } else {
2069     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2070   }
2071   A->ops->destroy = redund->MatDestroy;
2072   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2073   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2074   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2081 {
2082   PetscMPIInt    rank,size;
2083   MPI_Comm       comm=((PetscObject)mat)->comm;
2084   PetscErrorCode ierr;
2085   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2086   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2087   PetscInt       *rowrange=mat->rmap->range;
2088   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2089   Mat            A=aij->A,B=aij->B,C=*matredundant;
2090   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2091   PetscScalar    *sbuf_a;
2092   PetscInt       nzlocal=a->nz+b->nz;
2093   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2094   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2095   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2096   MatScalar      *aworkA,*aworkB;
2097   PetscScalar    *vals;
2098   PetscMPIInt    tag1,tag2,tag3,imdex;
2099   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2100                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2101   MPI_Status     recv_status,*send_status;
2102   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2103   PetscInt       **rbuf_j=PETSC_NULL;
2104   PetscScalar    **rbuf_a=PETSC_NULL;
2105   Mat_Redundant  *redund=PETSC_NULL;
2106   PetscContainer container;
2107 
2108   PetscFunctionBegin;
2109   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2111 
2112   if (reuse == MAT_REUSE_MATRIX) {
2113     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2114     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2115     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2116     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2117     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2118     if (container) {
2119       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2120     } else {
2121       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2122     }
2123     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2124 
2125     nsends    = redund->nsends;
2126     nrecvs    = redund->nrecvs;
2127     send_rank = redund->send_rank;
2128     recv_rank = redund->recv_rank;
2129     sbuf_nz   = redund->sbuf_nz;
2130     rbuf_nz   = redund->rbuf_nz;
2131     sbuf_j    = redund->sbuf_j;
2132     sbuf_a    = redund->sbuf_a;
2133     rbuf_j    = redund->rbuf_j;
2134     rbuf_a    = redund->rbuf_a;
2135   }
2136 
2137   if (reuse == MAT_INITIAL_MATRIX){
2138     PetscMPIInt  subrank,subsize;
2139     PetscInt     nleftover,np_subcomm;
2140     /* get the destination processors' id send_rank, nsends and nrecvs */
2141     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2142     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2143     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2144     np_subcomm = size/nsubcomm;
2145     nleftover  = size - nsubcomm*np_subcomm;
2146     nsends = 0; nrecvs = 0;
2147     for (i=0; i<size; i++){ /* i=rank*/
2148       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2149         send_rank[nsends] = i; nsends++;
2150         recv_rank[nrecvs++] = i;
2151       }
2152     }
2153     if (rank >= size - nleftover){/* this proc is a leftover processor */
2154       i = size-nleftover-1;
2155       j = 0;
2156       while (j < nsubcomm - nleftover){
2157         send_rank[nsends++] = i;
2158         i--; j++;
2159       }
2160     }
2161 
2162     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2163       for (i=0; i<nleftover; i++){
2164         recv_rank[nrecvs++] = size-nleftover+i;
2165       }
2166     }
2167 
2168     /* allocate sbuf_j, sbuf_a */
2169     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2170     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2171     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2172   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2173 
2174   /* copy mat's local entries into the buffers */
2175   if (reuse == MAT_INITIAL_MATRIX){
2176     rownz_max = 0;
2177     rptr = sbuf_j;
2178     cols = sbuf_j + rend-rstart + 1;
2179     vals = sbuf_a;
2180     rptr[0] = 0;
2181     for (i=0; i<rend-rstart; i++){
2182       row = i + rstart;
2183       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2184       ncols  = nzA + nzB;
2185       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2186       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2187       /* load the column indices for this row into cols */
2188       lwrite = 0;
2189       for (l=0; l<nzB; l++) {
2190         if ((ctmp = bmap[cworkB[l]]) < cstart){
2191           vals[lwrite]   = aworkB[l];
2192           cols[lwrite++] = ctmp;
2193         }
2194       }
2195       for (l=0; l<nzA; l++){
2196         vals[lwrite]   = aworkA[l];
2197         cols[lwrite++] = cstart + cworkA[l];
2198       }
2199       for (l=0; l<nzB; l++) {
2200         if ((ctmp = bmap[cworkB[l]]) >= cend){
2201           vals[lwrite]   = aworkB[l];
2202           cols[lwrite++] = ctmp;
2203         }
2204       }
2205       vals += ncols;
2206       cols += ncols;
2207       rptr[i+1] = rptr[i] + ncols;
2208       if (rownz_max < ncols) rownz_max = ncols;
2209     }
2210     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2211   } else { /* only copy matrix values into sbuf_a */
2212     rptr = sbuf_j;
2213     vals = sbuf_a;
2214     rptr[0] = 0;
2215     for (i=0; i<rend-rstart; i++){
2216       row = i + rstart;
2217       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2218       ncols  = nzA + nzB;
2219       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2220       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2221       lwrite = 0;
2222       for (l=0; l<nzB; l++) {
2223         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2224       }
2225       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2226       for (l=0; l<nzB; l++) {
2227         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2228       }
2229       vals += ncols;
2230       rptr[i+1] = rptr[i] + ncols;
2231     }
2232   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2233 
2234   /* send nzlocal to others, and recv other's nzlocal */
2235   /*--------------------------------------------------*/
2236   if (reuse == MAT_INITIAL_MATRIX){
2237     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2238     s_waits2 = s_waits3 + nsends;
2239     s_waits1 = s_waits2 + nsends;
2240     r_waits1 = s_waits1 + nsends;
2241     r_waits2 = r_waits1 + nrecvs;
2242     r_waits3 = r_waits2 + nrecvs;
2243   } else {
2244     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2245     r_waits3 = s_waits3 + nsends;
2246   }
2247 
2248   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2249   if (reuse == MAT_INITIAL_MATRIX){
2250     /* get new tags to keep the communication clean */
2251     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2252     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2253     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2254 
2255     /* post receives of other's nzlocal */
2256     for (i=0; i<nrecvs; i++){
2257       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2258     }
2259     /* send nzlocal to others */
2260     for (i=0; i<nsends; i++){
2261       sbuf_nz[i] = nzlocal;
2262       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2263     }
2264     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2265     count = nrecvs;
2266     while (count) {
2267       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2268       recv_rank[imdex] = recv_status.MPI_SOURCE;
2269       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2270       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2271 
2272       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2273       rbuf_nz[imdex] += i + 2;
2274       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2275       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2276       count--;
2277     }
2278     /* wait on sends of nzlocal */
2279     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2280     /* send mat->i,j to others, and recv from other's */
2281     /*------------------------------------------------*/
2282     for (i=0; i<nsends; i++){
2283       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2284       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2285     }
2286     /* wait on receives of mat->i,j */
2287     /*------------------------------*/
2288     count = nrecvs;
2289     while (count) {
2290       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2291       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2292       count--;
2293     }
2294     /* wait on sends of mat->i,j */
2295     /*---------------------------*/
2296     if (nsends) {
2297       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2298     }
2299   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2300 
2301   /* post receives, send and receive mat->a */
2302   /*----------------------------------------*/
2303   for (imdex=0; imdex<nrecvs; imdex++) {
2304     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2305   }
2306   for (i=0; i<nsends; i++){
2307     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2308   }
2309   count = nrecvs;
2310   while (count) {
2311     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2312     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2313     count--;
2314   }
2315   if (nsends) {
2316     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2317   }
2318 
2319   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2320 
2321   /* create redundant matrix */
2322   /*-------------------------*/
2323   if (reuse == MAT_INITIAL_MATRIX){
2324     /* compute rownz_max for preallocation */
2325     for (imdex=0; imdex<nrecvs; imdex++){
2326       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2327       rptr = rbuf_j[imdex];
2328       for (i=0; i<j; i++){
2329         ncols = rptr[i+1] - rptr[i];
2330         if (rownz_max < ncols) rownz_max = ncols;
2331       }
2332     }
2333 
2334     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2335     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2336     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2337     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2338     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2339   } else {
2340     C = *matredundant;
2341   }
2342 
2343   /* insert local matrix entries */
2344   rptr = sbuf_j;
2345   cols = sbuf_j + rend-rstart + 1;
2346   vals = sbuf_a;
2347   for (i=0; i<rend-rstart; i++){
2348     row   = i + rstart;
2349     ncols = rptr[i+1] - rptr[i];
2350     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2351     vals += ncols;
2352     cols += ncols;
2353   }
2354   /* insert received matrix entries */
2355   for (imdex=0; imdex<nrecvs; imdex++){
2356     rstart = rowrange[recv_rank[imdex]];
2357     rend   = rowrange[recv_rank[imdex]+1];
2358     rptr = rbuf_j[imdex];
2359     cols = rbuf_j[imdex] + rend-rstart + 1;
2360     vals = rbuf_a[imdex];
2361     for (i=0; i<rend-rstart; i++){
2362       row   = i + rstart;
2363       ncols = rptr[i+1] - rptr[i];
2364       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2365       vals += ncols;
2366       cols += ncols;
2367     }
2368   }
2369   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2370   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2371   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2372   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2373   if (reuse == MAT_INITIAL_MATRIX){
2374     PetscContainer container;
2375     *matredundant = C;
2376     /* create a supporting struct and attach it to C for reuse */
2377     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2378     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2379     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2380     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2381     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2382 
2383     redund->nzlocal = nzlocal;
2384     redund->nsends  = nsends;
2385     redund->nrecvs  = nrecvs;
2386     redund->send_rank = send_rank;
2387     redund->recv_rank = recv_rank;
2388     redund->sbuf_nz = sbuf_nz;
2389     redund->rbuf_nz = rbuf_nz;
2390     redund->sbuf_j  = sbuf_j;
2391     redund->sbuf_a  = sbuf_a;
2392     redund->rbuf_j  = rbuf_j;
2393     redund->rbuf_a  = rbuf_a;
2394 
2395     redund->MatDestroy = C->ops->destroy;
2396     C->ops->destroy    = MatDestroy_MatRedundant;
2397   }
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2404 {
2405   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2406   PetscErrorCode ierr;
2407   PetscInt       i,*idxb = 0;
2408   PetscScalar    *va,*vb;
2409   Vec            vtmp;
2410 
2411   PetscFunctionBegin;
2412   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2413   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2414   if (idx) {
2415     for (i=0; i<A->rmap->n; i++) {
2416       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2417     }
2418   }
2419 
2420   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2421   if (idx) {
2422     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2423   }
2424   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2425   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2426 
2427   for (i=0; i<A->rmap->n; i++){
2428     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2429       va[i] = vb[i];
2430       if (idx) idx[i] = a->garray[idxb[i]];
2431     }
2432   }
2433 
2434   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2435   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2436   if (idxb) {
2437     ierr = PetscFree(idxb);CHKERRQ(ierr);
2438   }
2439   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2446 {
2447   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2448   PetscErrorCode ierr;
2449   PetscInt       i,*idxb = 0;
2450   PetscScalar    *va,*vb;
2451   Vec            vtmp;
2452 
2453   PetscFunctionBegin;
2454   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2455   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2456   if (idx) {
2457     for (i=0; i<A->cmap->n; i++) {
2458       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2459     }
2460   }
2461 
2462   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2463   if (idx) {
2464     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2465   }
2466   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2467   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2468 
2469   for (i=0; i<A->rmap->n; i++){
2470     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2471       va[i] = vb[i];
2472       if (idx) idx[i] = a->garray[idxb[i]];
2473     }
2474   }
2475 
2476   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2477   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2478   if (idxb) {
2479     ierr = PetscFree(idxb);CHKERRQ(ierr);
2480   }
2481   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2488 {
2489   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2490   PetscInt       n      = A->rmap->n;
2491   PetscInt       cstart = A->cmap->rstart;
2492   PetscInt      *cmap   = mat->garray;
2493   PetscInt      *diagIdx, *offdiagIdx;
2494   Vec            diagV, offdiagV;
2495   PetscScalar   *a, *diagA, *offdiagA;
2496   PetscInt       r;
2497   PetscErrorCode ierr;
2498 
2499   PetscFunctionBegin;
2500   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2501   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2502   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2503   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2504   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2505   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2506   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2507   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2508   for(r = 0; r < n; ++r) {
2509     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2510       a[r]   = diagA[r];
2511       idx[r] = cstart + diagIdx[r];
2512     } else {
2513       a[r]   = offdiagA[r];
2514       idx[r] = cmap[offdiagIdx[r]];
2515     }
2516   }
2517   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2518   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2519   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2520   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2521   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2522   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2529 {
2530   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2531   PetscInt       n      = A->rmap->n;
2532   PetscInt       cstart = A->cmap->rstart;
2533   PetscInt      *cmap   = mat->garray;
2534   PetscInt      *diagIdx, *offdiagIdx;
2535   Vec            diagV, offdiagV;
2536   PetscScalar   *a, *diagA, *offdiagA;
2537   PetscInt       r;
2538   PetscErrorCode ierr;
2539 
2540   PetscFunctionBegin;
2541   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2542   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2543   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2544   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2545   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2546   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2547   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2548   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2549   for(r = 0; r < n; ++r) {
2550     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2551       a[r]   = diagA[r];
2552       idx[r] = cstart + diagIdx[r];
2553     } else {
2554       a[r]   = offdiagA[r];
2555       idx[r] = cmap[offdiagIdx[r]];
2556     }
2557   }
2558   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2559   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2560   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2561   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2562   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2563   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 #undef __FUNCT__
2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2570 {
2571   PetscErrorCode ierr;
2572   Mat            *dummy;
2573 
2574   PetscFunctionBegin;
2575   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2576   *newmat = *dummy;
2577   ierr = PetscFree(dummy);CHKERRQ(ierr);
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2582 /* -------------------------------------------------------------------*/
2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2584        MatGetRow_MPIAIJ,
2585        MatRestoreRow_MPIAIJ,
2586        MatMult_MPIAIJ,
2587 /* 4*/ MatMultAdd_MPIAIJ,
2588        MatMultTranspose_MPIAIJ,
2589        MatMultTransposeAdd_MPIAIJ,
2590 #ifdef PETSC_HAVE_PBGL
2591        MatSolve_MPIAIJ,
2592 #else
2593        0,
2594 #endif
2595        0,
2596        0,
2597 /*10*/ 0,
2598        0,
2599        0,
2600        MatSOR_MPIAIJ,
2601        MatTranspose_MPIAIJ,
2602 /*15*/ MatGetInfo_MPIAIJ,
2603        MatEqual_MPIAIJ,
2604        MatGetDiagonal_MPIAIJ,
2605        MatDiagonalScale_MPIAIJ,
2606        MatNorm_MPIAIJ,
2607 /*20*/ MatAssemblyBegin_MPIAIJ,
2608        MatAssemblyEnd_MPIAIJ,
2609        MatSetOption_MPIAIJ,
2610        MatZeroEntries_MPIAIJ,
2611 /*24*/ MatZeroRows_MPIAIJ,
2612        0,
2613 #ifdef PETSC_HAVE_PBGL
2614        0,
2615 #else
2616        0,
2617 #endif
2618        0,
2619        0,
2620 /*29*/ MatSetUpPreallocation_MPIAIJ,
2621 #ifdef PETSC_HAVE_PBGL
2622        0,
2623 #else
2624        0,
2625 #endif
2626        0,
2627        0,
2628        0,
2629 /*34*/ MatDuplicate_MPIAIJ,
2630        0,
2631        0,
2632        0,
2633        0,
2634 /*39*/ MatAXPY_MPIAIJ,
2635        MatGetSubMatrices_MPIAIJ,
2636        MatIncreaseOverlap_MPIAIJ,
2637        MatGetValues_MPIAIJ,
2638        MatCopy_MPIAIJ,
2639 /*44*/ MatGetRowMax_MPIAIJ,
2640        MatScale_MPIAIJ,
2641        0,
2642        0,
2643        0,
2644 /*49*/ MatSetBlockSize_MPIAIJ,
2645        0,
2646        0,
2647        0,
2648        0,
2649 /*54*/ MatFDColoringCreate_MPIAIJ,
2650        0,
2651        MatSetUnfactored_MPIAIJ,
2652        MatPermute_MPIAIJ,
2653        0,
2654 /*59*/ MatGetSubMatrix_MPIAIJ,
2655        MatDestroy_MPIAIJ,
2656        MatView_MPIAIJ,
2657        0,
2658        0,
2659 /*64*/ 0,
2660        0,
2661        0,
2662        0,
2663        0,
2664 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2665        MatGetRowMinAbs_MPIAIJ,
2666        0,
2667        MatSetColoring_MPIAIJ,
2668 #if defined(PETSC_HAVE_ADIC)
2669        MatSetValuesAdic_MPIAIJ,
2670 #else
2671        0,
2672 #endif
2673        MatSetValuesAdifor_MPIAIJ,
2674 /*75*/ MatFDColoringApply_AIJ,
2675        0,
2676        0,
2677        0,
2678        0,
2679 /*80*/ 0,
2680        0,
2681        0,
2682 /*83*/ MatLoad_MPIAIJ,
2683        0,
2684        0,
2685        0,
2686        0,
2687        0,
2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2689        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2690        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2691        MatPtAP_Basic,
2692        MatPtAPSymbolic_MPIAIJ,
2693 /*94*/ MatPtAPNumeric_MPIAIJ,
2694        0,
2695        0,
2696        0,
2697        0,
2698 /*99*/ 0,
2699        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2700        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2701        MatConjugate_MPIAIJ,
2702        0,
2703 /*104*/MatSetValuesRow_MPIAIJ,
2704        MatRealPart_MPIAIJ,
2705        MatImaginaryPart_MPIAIJ,
2706        0,
2707        0,
2708 /*109*/0,
2709        MatGetRedundantMatrix_MPIAIJ,
2710        MatGetRowMin_MPIAIJ,
2711        0,
2712        0,
2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2714        0,
2715        0,
2716        0,
2717        0,
2718 /*119*/0,
2719        0,
2720        0,
2721        0
2722 };
2723 
2724 /* ----------------------------------------------------------------------------------------*/
2725 
2726 EXTERN_C_BEGIN
2727 #undef __FUNCT__
2728 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2729 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2730 {
2731   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2732   PetscErrorCode ierr;
2733 
2734   PetscFunctionBegin;
2735   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2736   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2737   PetscFunctionReturn(0);
2738 }
2739 EXTERN_C_END
2740 
2741 EXTERN_C_BEGIN
2742 #undef __FUNCT__
2743 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2744 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2745 {
2746   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2747   PetscErrorCode ierr;
2748 
2749   PetscFunctionBegin;
2750   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2751   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2752   PetscFunctionReturn(0);
2753 }
2754 EXTERN_C_END
2755 
2756 EXTERN_C_BEGIN
2757 #undef __FUNCT__
2758 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2759 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2760 {
2761   Mat_MPIAIJ     *b;
2762   PetscErrorCode ierr;
2763   PetscInt       i;
2764 
2765   PetscFunctionBegin;
2766   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2767   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2768   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2769   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2770 
2771   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2772   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2773   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2774   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2775   if (d_nnz) {
2776     for (i=0; i<B->rmap->n; i++) {
2777       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2778     }
2779   }
2780   if (o_nnz) {
2781     for (i=0; i<B->rmap->n; i++) {
2782       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2783     }
2784   }
2785   b = (Mat_MPIAIJ*)B->data;
2786 
2787   if (!B->preallocated) {
2788     /* Explicitly create 2 MATSEQAIJ matrices. */
2789     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2790     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2791     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2792     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2793     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2794     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2795     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2796     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2797   }
2798 
2799   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2800   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2801   B->preallocated = PETSC_TRUE;
2802   PetscFunctionReturn(0);
2803 }
2804 EXTERN_C_END
2805 
2806 #undef __FUNCT__
2807 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2808 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2809 {
2810   Mat            mat;
2811   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2812   PetscErrorCode ierr;
2813 
2814   PetscFunctionBegin;
2815   *newmat       = 0;
2816   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2817   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2818   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2819   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2820   a    = (Mat_MPIAIJ*)mat->data;
2821 
2822   mat->factortype    = matin->factortype;
2823   mat->rmap->bs      = matin->rmap->bs;
2824   mat->assembled    = PETSC_TRUE;
2825   mat->insertmode   = NOT_SET_VALUES;
2826   mat->preallocated = PETSC_TRUE;
2827 
2828   a->size           = oldmat->size;
2829   a->rank           = oldmat->rank;
2830   a->donotstash     = oldmat->donotstash;
2831   a->roworiented    = oldmat->roworiented;
2832   a->rowindices     = 0;
2833   a->rowvalues      = 0;
2834   a->getrowactive   = PETSC_FALSE;
2835 
2836   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2837   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2838 
2839   if (oldmat->colmap) {
2840 #if defined (PETSC_USE_CTABLE)
2841     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2842 #else
2843     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2844     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2845     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2846 #endif
2847   } else a->colmap = 0;
2848   if (oldmat->garray) {
2849     PetscInt len;
2850     len  = oldmat->B->cmap->n;
2851     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2852     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2853     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2854   } else a->garray = 0;
2855 
2856   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2857   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2858   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2859   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2860   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2861   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2862   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2863   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2864   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2865   *newmat = mat;
2866   PetscFunctionReturn(0);
2867 }
2868 
2869 #undef __FUNCT__
2870 #define __FUNCT__ "MatLoad_MPIAIJ"
2871 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2872 {
2873   PetscScalar    *vals,*svals;
2874   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2875   MPI_Status     status;
2876   PetscErrorCode ierr;
2877   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2878   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2879   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2880   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2881   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2882   int            fd;
2883 
2884   PetscFunctionBegin;
2885   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2886   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2887   if (!rank) {
2888     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2889     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2890     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2891   }
2892 
2893   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2894 
2895   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2896   M = header[1]; N = header[2];
2897   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2898   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2899   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2900 
2901   /* If global sizes are set, check if they are consistent with that given in the file */
2902   if (sizesset) {
2903     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2904   }
2905   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2906   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2907 
2908   /* determine ownership of all rows */
2909   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
2910   else m = newMat->rmap->n; /* Set by user */
2911 
2912   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2913   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2914 
2915   /* First process needs enough room for process with most rows */
2916   if (!rank) {
2917     mmax       = rowners[1];
2918     for (i=2; i<size; i++) {
2919       mmax = PetscMax(mmax,rowners[i]);
2920     }
2921   } else mmax = m;
2922 
2923   rowners[0] = 0;
2924   for (i=2; i<=size; i++) {
2925     rowners[i] += rowners[i-1];
2926   }
2927   rstart = rowners[rank];
2928   rend   = rowners[rank+1];
2929 
2930   /* distribute row lengths to all processors */
2931   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2932   if (!rank) {
2933     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2934     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2935     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2936     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2937     for (j=0; j<m; j++) {
2938       procsnz[0] += ourlens[j];
2939     }
2940     for (i=1; i<size; i++) {
2941       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2942       /* calculate the number of nonzeros on each processor */
2943       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2944         procsnz[i] += rowlengths[j];
2945       }
2946       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2947       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2948     }
2949     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2950   } else {
2951     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2952     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2953   }
2954 
2955   if (!rank) {
2956     /* determine max buffer needed and allocate it */
2957     maxnz = 0;
2958     for (i=0; i<size; i++) {
2959       maxnz = PetscMax(maxnz,procsnz[i]);
2960     }
2961     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2962 
2963     /* read in my part of the matrix column indices  */
2964     nz   = procsnz[0];
2965     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2966     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2967 
2968     /* read in every one elses and ship off */
2969     for (i=1; i<size; i++) {
2970       nz     = procsnz[i];
2971       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2972       mpicnt = PetscMPIIntCast(nz);
2973       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2974     }
2975     ierr = PetscFree(cols);CHKERRQ(ierr);
2976   } else {
2977     /* determine buffer space needed for message */
2978     nz = 0;
2979     for (i=0; i<m; i++) {
2980       nz += ourlens[i];
2981     }
2982     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2983 
2984     /* receive message of column indices*/
2985     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2986     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2987     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2988     if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);
2989     else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);
2990     else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);
2991   }
2992 
2993   /* determine column ownership if matrix is not square */
2994   if (N != M) {
2995     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
2996     else n = newMat->cmap->n;
2997     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2998     cstart = cend - n;
2999   } else {
3000     cstart = rstart;
3001     cend   = rend;
3002     n      = cend - cstart;
3003   }
3004 
3005   /* loop over local rows, determining number of off diagonal entries */
3006   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3007   jj = 0;
3008   for (i=0; i<m; i++) {
3009     for (j=0; j<ourlens[i]; j++) {
3010       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3011       jj++;
3012     }
3013   }
3014 
3015   for (i=0; i<m; i++) {
3016     ourlens[i] -= offlens[i];
3017   }
3018   if (!sizesset) {
3019     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3020   }
3021   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3022 
3023   for (i=0; i<m; i++) {
3024     ourlens[i] += offlens[i];
3025   }
3026 
3027   if (!rank) {
3028     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3029 
3030     /* read in my part of the matrix numerical values  */
3031     nz   = procsnz[0];
3032     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3033 
3034     /* insert into matrix */
3035     jj      = rstart;
3036     smycols = mycols;
3037     svals   = vals;
3038     for (i=0; i<m; i++) {
3039       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3040       smycols += ourlens[i];
3041       svals   += ourlens[i];
3042       jj++;
3043     }
3044 
3045     /* read in other processors and ship out */
3046     for (i=1; i<size; i++) {
3047       nz     = procsnz[i];
3048       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3049       mpicnt = PetscMPIIntCast(nz);
3050       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3051     }
3052     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3053   } else {
3054     /* receive numeric values */
3055     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3056 
3057     /* receive message of values*/
3058     mpicnt = PetscMPIIntCast(nz);
3059     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm,&status);CHKERRQ(ierr);
3060     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3061     if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);
3062     else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);
3063     else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);
3064 
3065     /* insert into matrix */
3066     jj      = rstart;
3067     smycols = mycols;
3068     svals   = vals;
3069     for (i=0; i<m; i++) {
3070       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3071       smycols += ourlens[i];
3072       svals   += ourlens[i];
3073       jj++;
3074     }
3075   }
3076   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3077   ierr = PetscFree(vals);CHKERRQ(ierr);
3078   ierr = PetscFree(mycols);CHKERRQ(ierr);
3079   ierr = PetscFree(rowners);CHKERRQ(ierr);
3080 
3081   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3082   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3083   PetscFunctionReturn(0);
3084 }
3085 
3086 #undef __FUNCT__
3087 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3088 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3089 {
3090   PetscErrorCode ierr;
3091   IS             iscol_local;
3092   PetscInt       csize;
3093 
3094   PetscFunctionBegin;
3095   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3096   if (call == MAT_REUSE_MATRIX) {
3097     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3098     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3099   } else {
3100     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3101   }
3102   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3103   if (call == MAT_INITIAL_MATRIX) {
3104     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3105     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3106   }
3107   PetscFunctionReturn(0);
3108 }
3109 
3110 #undef __FUNCT__
3111 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3112 /*
3113     Not great since it makes two copies of the submatrix, first an SeqAIJ
3114   in local and then by concatenating the local matrices the end result.
3115   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3116 
3117   Note: This requires a sequential iscol with all indices.
3118 */
3119 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3120 {
3121   PetscErrorCode ierr;
3122   PetscMPIInt    rank,size;
3123   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3124   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3125   Mat            *local,M,Mreuse;
3126   MatScalar      *vwork,*aa;
3127   MPI_Comm       comm = ((PetscObject)mat)->comm;
3128   Mat_SeqAIJ     *aij;
3129 
3130 
3131   PetscFunctionBegin;
3132   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3133   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3134 
3135   if (call ==  MAT_REUSE_MATRIX) {
3136     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3137     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3138     local = &Mreuse;
3139     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3140   } else {
3141     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3142     Mreuse = *local;
3143     ierr   = PetscFree(local);CHKERRQ(ierr);
3144   }
3145 
3146   /*
3147       m - number of local rows
3148       n - number of columns (same on all processors)
3149       rstart - first row in new global matrix generated
3150   */
3151   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3152   if (call == MAT_INITIAL_MATRIX) {
3153     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3154     ii  = aij->i;
3155     jj  = aij->j;
3156 
3157     /*
3158         Determine the number of non-zeros in the diagonal and off-diagonal
3159         portions of the matrix in order to do correct preallocation
3160     */
3161 
3162     /* first get start and end of "diagonal" columns */
3163     if (csize == PETSC_DECIDE) {
3164       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3165       if (mglobal == n) { /* square matrix */
3166 	nlocal = m;
3167       } else {
3168         nlocal = n/size + ((n % size) > rank);
3169       }
3170     } else {
3171       nlocal = csize;
3172     }
3173     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3174     rstart = rend - nlocal;
3175     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3176 
3177     /* next, compute all the lengths */
3178     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3179     olens = dlens + m;
3180     for (i=0; i<m; i++) {
3181       jend = ii[i+1] - ii[i];
3182       olen = 0;
3183       dlen = 0;
3184       for (j=0; j<jend; j++) {
3185         if (*jj < rstart || *jj >= rend) olen++;
3186         else dlen++;
3187         jj++;
3188       }
3189       olens[i] = olen;
3190       dlens[i] = dlen;
3191     }
3192     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3193     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3194     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3195     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3196     ierr = PetscFree(dlens);CHKERRQ(ierr);
3197   } else {
3198     PetscInt ml,nl;
3199 
3200     M = *newmat;
3201     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3202     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3203     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3204     /*
3205          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3206        rather than the slower MatSetValues().
3207     */
3208     M->was_assembled = PETSC_TRUE;
3209     M->assembled     = PETSC_FALSE;
3210   }
3211   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3212   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3213   ii  = aij->i;
3214   jj  = aij->j;
3215   aa  = aij->a;
3216   for (i=0; i<m; i++) {
3217     row   = rstart + i;
3218     nz    = ii[i+1] - ii[i];
3219     cwork = jj;     jj += nz;
3220     vwork = aa;     aa += nz;
3221     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3222   }
3223 
3224   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3225   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3226   *newmat = M;
3227 
3228   /* save submatrix used in processor for next request */
3229   if (call ==  MAT_INITIAL_MATRIX) {
3230     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3231     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3232   }
3233 
3234   PetscFunctionReturn(0);
3235 }
3236 
3237 EXTERN_C_BEGIN
3238 #undef __FUNCT__
3239 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3240 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3241 {
3242   PetscInt       m,cstart, cend,j,nnz,i,d;
3243   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3244   const PetscInt *JJ;
3245   PetscScalar    *values;
3246   PetscErrorCode ierr;
3247 
3248   PetscFunctionBegin;
3249   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3250 
3251   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3252   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3253   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3254   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3255   m      = B->rmap->n;
3256   cstart = B->cmap->rstart;
3257   cend   = B->cmap->rend;
3258   rstart = B->rmap->rstart;
3259 
3260   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3261 
3262 #if defined(PETSC_USE_DEBUGGING)
3263   for (i=0; i<m; i++) {
3264     nnz     = Ii[i+1]- Ii[i];
3265     JJ      = J + Ii[i];
3266     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3267     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3268     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3269   }
3270 #endif
3271 
3272   for (i=0; i<m; i++) {
3273     nnz     = Ii[i+1]- Ii[i];
3274     JJ      = J + Ii[i];
3275     nnz_max = PetscMax(nnz_max,nnz);
3276     d       = 0;
3277     for (j=0; j<nnz; j++) {
3278       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3279     }
3280     d_nnz[i] = d;
3281     o_nnz[i] = nnz - d;
3282   }
3283   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3284   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3285 
3286   if (v) values = (PetscScalar*)v;
3287   else {
3288     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3289     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3290   }
3291 
3292   for (i=0; i<m; i++) {
3293     ii   = i + rstart;
3294     nnz  = Ii[i+1]- Ii[i];
3295     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3296   }
3297   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3298   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3299 
3300   if (!v) {
3301     ierr = PetscFree(values);CHKERRQ(ierr);
3302   }
3303   PetscFunctionReturn(0);
3304 }
3305 EXTERN_C_END
3306 
3307 #undef __FUNCT__
3308 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3309 /*@
3310    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3311    (the default parallel PETSc format).
3312 
3313    Collective on MPI_Comm
3314 
3315    Input Parameters:
3316 +  B - the matrix
3317 .  i - the indices into j for the start of each local row (starts with zero)
3318 .  j - the column indices for each local row (starts with zero)
3319 -  v - optional values in the matrix
3320 
3321    Level: developer
3322 
3323    Notes:
3324        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3325      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3326      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3327 
3328        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3329 
3330        The format which is used for the sparse matrix input, is equivalent to a
3331     row-major ordering.. i.e for the following matrix, the input data expected is
3332     as shown:
3333 
3334         1 0 0
3335         2 0 3     P0
3336        -------
3337         4 5 6     P1
3338 
3339      Process0 [P0]: rows_owned=[0,1]
3340         i =  {0,1,3}  [size = nrow+1  = 2+1]
3341         j =  {0,0,2}  [size = nz = 6]
3342         v =  {1,2,3}  [size = nz = 6]
3343 
3344      Process1 [P1]: rows_owned=[2]
3345         i =  {0,3}    [size = nrow+1  = 1+1]
3346         j =  {0,1,2}  [size = nz = 6]
3347         v =  {4,5,6}  [size = nz = 6]
3348 
3349 .keywords: matrix, aij, compressed row, sparse, parallel
3350 
3351 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3352           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3353 @*/
3354 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3355 {
3356   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3357 
3358   PetscFunctionBegin;
3359   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3360   if (f) {
3361     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3362   }
3363   PetscFunctionReturn(0);
3364 }
3365 
3366 #undef __FUNCT__
3367 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3368 /*@C
3369    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3370    (the default parallel PETSc format).  For good matrix assembly performance
3371    the user should preallocate the matrix storage by setting the parameters
3372    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3373    performance can be increased by more than a factor of 50.
3374 
3375    Collective on MPI_Comm
3376 
3377    Input Parameters:
3378 +  A - the matrix
3379 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3380            (same value is used for all local rows)
3381 .  d_nnz - array containing the number of nonzeros in the various rows of the
3382            DIAGONAL portion of the local submatrix (possibly different for each row)
3383            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3384            The size of this array is equal to the number of local rows, i.e 'm'.
3385            You must leave room for the diagonal entry even if it is zero.
3386 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3387            submatrix (same value is used for all local rows).
3388 -  o_nnz - array containing the number of nonzeros in the various rows of the
3389            OFF-DIAGONAL portion of the local submatrix (possibly different for
3390            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3391            structure. The size of this array is equal to the number
3392            of local rows, i.e 'm'.
3393 
3394    If the *_nnz parameter is given then the *_nz parameter is ignored
3395 
3396    The AIJ format (also called the Yale sparse matrix format or
3397    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3398    storage.  The stored row and column indices begin with zero.
3399    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3400 
3401    The parallel matrix is partitioned such that the first m0 rows belong to
3402    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3403    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3404 
3405    The DIAGONAL portion of the local submatrix of a processor can be defined
3406    as the submatrix which is obtained by extraction the part corresponding
3407    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3408    first row that belongs to the processor, and r2 is the last row belonging
3409    to the this processor. This is a square mxm matrix. The remaining portion
3410    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3411 
3412    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3413 
3414    You can call MatGetInfo() to get information on how effective the preallocation was;
3415    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3416    You can also run with the option -info and look for messages with the string
3417    malloc in them to see if additional memory allocation was needed.
3418 
3419    Example usage:
3420 
3421    Consider the following 8x8 matrix with 34 non-zero values, that is
3422    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3423    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3424    as follows:
3425 
3426 .vb
3427             1  2  0  |  0  3  0  |  0  4
3428     Proc0   0  5  6  |  7  0  0  |  8  0
3429             9  0 10  | 11  0  0  | 12  0
3430     -------------------------------------
3431            13  0 14  | 15 16 17  |  0  0
3432     Proc1   0 18  0  | 19 20 21  |  0  0
3433             0  0  0  | 22 23  0  | 24  0
3434     -------------------------------------
3435     Proc2  25 26 27  |  0  0 28  | 29  0
3436            30  0  0  | 31 32 33  |  0 34
3437 .ve
3438 
3439    This can be represented as a collection of submatrices as:
3440 
3441 .vb
3442       A B C
3443       D E F
3444       G H I
3445 .ve
3446 
3447    Where the submatrices A,B,C are owned by proc0, D,E,F are
3448    owned by proc1, G,H,I are owned by proc2.
3449 
3450    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3451    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3452    The 'M','N' parameters are 8,8, and have the same values on all procs.
3453 
3454    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3455    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3456    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3457    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3458    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3459    matrix, ans [DF] as another SeqAIJ matrix.
3460 
3461    When d_nz, o_nz parameters are specified, d_nz storage elements are
3462    allocated for every row of the local diagonal submatrix, and o_nz
3463    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3464    One way to choose d_nz and o_nz is to use the max nonzerors per local
3465    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3466    In this case, the values of d_nz,o_nz are:
3467 .vb
3468      proc0 : dnz = 2, o_nz = 2
3469      proc1 : dnz = 3, o_nz = 2
3470      proc2 : dnz = 1, o_nz = 4
3471 .ve
3472    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3473    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3474    for proc3. i.e we are using 12+15+10=37 storage locations to store
3475    34 values.
3476 
3477    When d_nnz, o_nnz parameters are specified, the storage is specified
3478    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3479    In the above case the values for d_nnz,o_nnz are:
3480 .vb
3481      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3482      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3483      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3484 .ve
3485    Here the space allocated is sum of all the above values i.e 34, and
3486    hence pre-allocation is perfect.
3487 
3488    Level: intermediate
3489 
3490 .keywords: matrix, aij, compressed row, sparse, parallel
3491 
3492 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3493           MPIAIJ, MatGetInfo()
3494 @*/
3495 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3496 {
3497   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3498 
3499   PetscFunctionBegin;
3500   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3501   if (f) {
3502     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3503   }
3504   PetscFunctionReturn(0);
3505 }
3506 
3507 #undef __FUNCT__
3508 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3509 /*@
3510      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3511          CSR format the local rows.
3512 
3513    Collective on MPI_Comm
3514 
3515    Input Parameters:
3516 +  comm - MPI communicator
3517 .  m - number of local rows (Cannot be PETSC_DECIDE)
3518 .  n - This value should be the same as the local size used in creating the
3519        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3520        calculated if N is given) For square matrices n is almost always m.
3521 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3522 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3523 .   i - row indices
3524 .   j - column indices
3525 -   a - matrix values
3526 
3527    Output Parameter:
3528 .   mat - the matrix
3529 
3530    Level: intermediate
3531 
3532    Notes:
3533        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3534      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3535      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3536 
3537        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3538 
3539        The format which is used for the sparse matrix input, is equivalent to a
3540     row-major ordering.. i.e for the following matrix, the input data expected is
3541     as shown:
3542 
3543         1 0 0
3544         2 0 3     P0
3545        -------
3546         4 5 6     P1
3547 
3548      Process0 [P0]: rows_owned=[0,1]
3549         i =  {0,1,3}  [size = nrow+1  = 2+1]
3550         j =  {0,0,2}  [size = nz = 6]
3551         v =  {1,2,3}  [size = nz = 6]
3552 
3553      Process1 [P1]: rows_owned=[2]
3554         i =  {0,3}    [size = nrow+1  = 1+1]
3555         j =  {0,1,2}  [size = nz = 6]
3556         v =  {4,5,6}  [size = nz = 6]
3557 
3558 .keywords: matrix, aij, compressed row, sparse, parallel
3559 
3560 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3561           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3562 @*/
3563 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3564 {
3565   PetscErrorCode ierr;
3566 
3567  PetscFunctionBegin;
3568   if (i[0]) {
3569     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3570   }
3571   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3572   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3573   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3574   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3575   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3576   PetscFunctionReturn(0);
3577 }
3578 
3579 #undef __FUNCT__
3580 #define __FUNCT__ "MatCreateMPIAIJ"
3581 /*@C
3582    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3583    (the default parallel PETSc format).  For good matrix assembly performance
3584    the user should preallocate the matrix storage by setting the parameters
3585    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3586    performance can be increased by more than a factor of 50.
3587 
3588    Collective on MPI_Comm
3589 
3590    Input Parameters:
3591 +  comm - MPI communicator
3592 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3593            This value should be the same as the local size used in creating the
3594            y vector for the matrix-vector product y = Ax.
3595 .  n - This value should be the same as the local size used in creating the
3596        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3597        calculated if N is given) For square matrices n is almost always m.
3598 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3599 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3600 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3601            (same value is used for all local rows)
3602 .  d_nnz - array containing the number of nonzeros in the various rows of the
3603            DIAGONAL portion of the local submatrix (possibly different for each row)
3604            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3605            The size of this array is equal to the number of local rows, i.e 'm'.
3606            You must leave room for the diagonal entry even if it is zero.
3607 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3608            submatrix (same value is used for all local rows).
3609 -  o_nnz - array containing the number of nonzeros in the various rows of the
3610            OFF-DIAGONAL portion of the local submatrix (possibly different for
3611            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3612            structure. The size of this array is equal to the number
3613            of local rows, i.e 'm'.
3614 
3615    Output Parameter:
3616 .  A - the matrix
3617 
3618    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3619    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3620    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3621 
3622    Notes:
3623    If the *_nnz parameter is given then the *_nz parameter is ignored
3624 
3625    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3626    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3627    storage requirements for this matrix.
3628 
3629    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3630    processor than it must be used on all processors that share the object for
3631    that argument.
3632 
3633    The user MUST specify either the local or global matrix dimensions
3634    (possibly both).
3635 
3636    The parallel matrix is partitioned across processors such that the
3637    first m0 rows belong to process 0, the next m1 rows belong to
3638    process 1, the next m2 rows belong to process 2 etc.. where
3639    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3640    values corresponding to [m x N] submatrix.
3641 
3642    The columns are logically partitioned with the n0 columns belonging
3643    to 0th partition, the next n1 columns belonging to the next
3644    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3645 
3646    The DIAGONAL portion of the local submatrix on any given processor
3647    is the submatrix corresponding to the rows and columns m,n
3648    corresponding to the given processor. i.e diagonal matrix on
3649    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3650    etc. The remaining portion of the local submatrix [m x (N-n)]
3651    constitute the OFF-DIAGONAL portion. The example below better
3652    illustrates this concept.
3653 
3654    For a square global matrix we define each processor's diagonal portion
3655    to be its local rows and the corresponding columns (a square submatrix);
3656    each processor's off-diagonal portion encompasses the remainder of the
3657    local matrix (a rectangular submatrix).
3658 
3659    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3660 
3661    When calling this routine with a single process communicator, a matrix of
3662    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3663    type of communicator, use the construction mechanism:
3664      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3665 
3666    By default, this format uses inodes (identical nodes) when possible.
3667    We search for consecutive rows with the same nonzero structure, thereby
3668    reusing matrix information to achieve increased efficiency.
3669 
3670    Options Database Keys:
3671 +  -mat_no_inode  - Do not use inodes
3672 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3673 -  -mat_aij_oneindex - Internally use indexing starting at 1
3674         rather than 0.  Note that when calling MatSetValues(),
3675         the user still MUST index entries starting at 0!
3676 
3677 
3678    Example usage:
3679 
3680    Consider the following 8x8 matrix with 34 non-zero values, that is
3681    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3682    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3683    as follows:
3684 
3685 .vb
3686             1  2  0  |  0  3  0  |  0  4
3687     Proc0   0  5  6  |  7  0  0  |  8  0
3688             9  0 10  | 11  0  0  | 12  0
3689     -------------------------------------
3690            13  0 14  | 15 16 17  |  0  0
3691     Proc1   0 18  0  | 19 20 21  |  0  0
3692             0  0  0  | 22 23  0  | 24  0
3693     -------------------------------------
3694     Proc2  25 26 27  |  0  0 28  | 29  0
3695            30  0  0  | 31 32 33  |  0 34
3696 .ve
3697 
3698    This can be represented as a collection of submatrices as:
3699 
3700 .vb
3701       A B C
3702       D E F
3703       G H I
3704 .ve
3705 
3706    Where the submatrices A,B,C are owned by proc0, D,E,F are
3707    owned by proc1, G,H,I are owned by proc2.
3708 
3709    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3710    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3711    The 'M','N' parameters are 8,8, and have the same values on all procs.
3712 
3713    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3714    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3715    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3716    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3717    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3718    matrix, ans [DF] as another SeqAIJ matrix.
3719 
3720    When d_nz, o_nz parameters are specified, d_nz storage elements are
3721    allocated for every row of the local diagonal submatrix, and o_nz
3722    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3723    One way to choose d_nz and o_nz is to use the max nonzerors per local
3724    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3725    In this case, the values of d_nz,o_nz are:
3726 .vb
3727      proc0 : dnz = 2, o_nz = 2
3728      proc1 : dnz = 3, o_nz = 2
3729      proc2 : dnz = 1, o_nz = 4
3730 .ve
3731    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3732    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3733    for proc3. i.e we are using 12+15+10=37 storage locations to store
3734    34 values.
3735 
3736    When d_nnz, o_nnz parameters are specified, the storage is specified
3737    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3738    In the above case the values for d_nnz,o_nnz are:
3739 .vb
3740      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3741      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3742      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3743 .ve
3744    Here the space allocated is sum of all the above values i.e 34, and
3745    hence pre-allocation is perfect.
3746 
3747    Level: intermediate
3748 
3749 .keywords: matrix, aij, compressed row, sparse, parallel
3750 
3751 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3752           MPIAIJ, MatCreateMPIAIJWithArrays()
3753 @*/
3754 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3755 {
3756   PetscErrorCode ierr;
3757   PetscMPIInt    size;
3758 
3759   PetscFunctionBegin;
3760   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3761   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3762   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3763   if (size > 1) {
3764     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3765     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3766   } else {
3767     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3768     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3769   }
3770   PetscFunctionReturn(0);
3771 }
3772 
3773 #undef __FUNCT__
3774 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3775 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3776 {
3777   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3778 
3779   PetscFunctionBegin;
3780   *Ad     = a->A;
3781   *Ao     = a->B;
3782   *colmap = a->garray;
3783   PetscFunctionReturn(0);
3784 }
3785 
3786 #undef __FUNCT__
3787 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3788 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3789 {
3790   PetscErrorCode ierr;
3791   PetscInt       i;
3792   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3793 
3794   PetscFunctionBegin;
3795   if (coloring->ctype == IS_COLORING_GLOBAL) {
3796     ISColoringValue *allcolors,*colors;
3797     ISColoring      ocoloring;
3798 
3799     /* set coloring for diagonal portion */
3800     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3801 
3802     /* set coloring for off-diagonal portion */
3803     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3804     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3805     for (i=0; i<a->B->cmap->n; i++) {
3806       colors[i] = allcolors[a->garray[i]];
3807     }
3808     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3809     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3810     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3811     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3812   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3813     ISColoringValue *colors;
3814     PetscInt        *larray;
3815     ISColoring      ocoloring;
3816 
3817     /* set coloring for diagonal portion */
3818     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3819     for (i=0; i<a->A->cmap->n; i++) {
3820       larray[i] = i + A->cmap->rstart;
3821     }
3822     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3823     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3824     for (i=0; i<a->A->cmap->n; i++) {
3825       colors[i] = coloring->colors[larray[i]];
3826     }
3827     ierr = PetscFree(larray);CHKERRQ(ierr);
3828     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3829     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3830     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3831 
3832     /* set coloring for off-diagonal portion */
3833     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3834     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3835     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3836     for (i=0; i<a->B->cmap->n; i++) {
3837       colors[i] = coloring->colors[larray[i]];
3838     }
3839     ierr = PetscFree(larray);CHKERRQ(ierr);
3840     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3841     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3842     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3843   } else {
3844     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3845   }
3846 
3847   PetscFunctionReturn(0);
3848 }
3849 
3850 #if defined(PETSC_HAVE_ADIC)
3851 #undef __FUNCT__
3852 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3853 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3854 {
3855   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3856   PetscErrorCode ierr;
3857 
3858   PetscFunctionBegin;
3859   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3860   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3861   PetscFunctionReturn(0);
3862 }
3863 #endif
3864 
3865 #undef __FUNCT__
3866 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3867 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3868 {
3869   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3870   PetscErrorCode ierr;
3871 
3872   PetscFunctionBegin;
3873   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3874   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3875   PetscFunctionReturn(0);
3876 }
3877 
3878 #undef __FUNCT__
3879 #define __FUNCT__ "MatMerge"
3880 /*@
3881       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3882                  matrices from each processor
3883 
3884     Collective on MPI_Comm
3885 
3886    Input Parameters:
3887 +    comm - the communicators the parallel matrix will live on
3888 .    inmat - the input sequential matrices
3889 .    n - number of local columns (or PETSC_DECIDE)
3890 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3891 
3892    Output Parameter:
3893 .    outmat - the parallel matrix generated
3894 
3895     Level: advanced
3896 
3897    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3898 
3899 @*/
3900 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3901 {
3902   PetscErrorCode ierr;
3903   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3904   PetscInt       *indx;
3905   PetscScalar    *values;
3906 
3907   PetscFunctionBegin;
3908   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3909   if (scall == MAT_INITIAL_MATRIX){
3910     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3911     if (n == PETSC_DECIDE){
3912       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3913     }
3914     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3915     rstart -= m;
3916 
3917     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3918     for (i=0;i<m;i++) {
3919       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3920       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3921       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3922     }
3923     /* This routine will ONLY return MPIAIJ type matrix */
3924     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3925     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3926     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3927     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3928     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3929 
3930   } else if (scall == MAT_REUSE_MATRIX){
3931     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3932   } else {
3933     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3934   }
3935 
3936   for (i=0;i<m;i++) {
3937     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3938     Ii    = i + rstart;
3939     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3940     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3941   }
3942   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3943   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3944   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3945 
3946   PetscFunctionReturn(0);
3947 }
3948 
3949 #undef __FUNCT__
3950 #define __FUNCT__ "MatFileSplit"
3951 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3952 {
3953   PetscErrorCode    ierr;
3954   PetscMPIInt       rank;
3955   PetscInt          m,N,i,rstart,nnz;
3956   size_t            len;
3957   const PetscInt    *indx;
3958   PetscViewer       out;
3959   char              *name;
3960   Mat               B;
3961   const PetscScalar *values;
3962 
3963   PetscFunctionBegin;
3964   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3965   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3966   /* Should this be the type of the diagonal block of A? */
3967   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3968   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3969   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3970   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3971   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3972   for (i=0;i<m;i++) {
3973     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3974     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3975     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3976   }
3977   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3978   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3979 
3980   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3981   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3982   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3983   sprintf(name,"%s.%d",outfile,rank);
3984   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3985   ierr = PetscFree(name);
3986   ierr = MatView(B,out);CHKERRQ(ierr);
3987   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3988   ierr = MatDestroy(B);CHKERRQ(ierr);
3989   PetscFunctionReturn(0);
3990 }
3991 
3992 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3993 #undef __FUNCT__
3994 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3995 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3996 {
3997   PetscErrorCode       ierr;
3998   Mat_Merge_SeqsToMPI  *merge;
3999   PetscContainer       container;
4000 
4001   PetscFunctionBegin;
4002   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4003   if (container) {
4004     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4005     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4006     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4007     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4008     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4009     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4010     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4011     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4012     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4013     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4014     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4015     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4016     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4017     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4018 
4019     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4020     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4021   }
4022   ierr = PetscFree(merge);CHKERRQ(ierr);
4023 
4024   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4025   PetscFunctionReturn(0);
4026 }
4027 
4028 #include "../src/mat/utils/freespace.h"
4029 #include "petscbt.h"
4030 
4031 #undef __FUNCT__
4032 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4033 /*@C
4034       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4035                  matrices from each processor
4036 
4037     Collective on MPI_Comm
4038 
4039    Input Parameters:
4040 +    comm - the communicators the parallel matrix will live on
4041 .    seqmat - the input sequential matrices
4042 .    m - number of local rows (or PETSC_DECIDE)
4043 .    n - number of local columns (or PETSC_DECIDE)
4044 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4045 
4046    Output Parameter:
4047 .    mpimat - the parallel matrix generated
4048 
4049     Level: advanced
4050 
4051    Notes:
4052      The dimensions of the sequential matrix in each processor MUST be the same.
4053      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4054      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4055 @*/
4056 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4057 {
4058   PetscErrorCode       ierr;
4059   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4060   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4061   PetscMPIInt          size,rank,taga,*len_s;
4062   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4063   PetscInt             proc,m;
4064   PetscInt             **buf_ri,**buf_rj;
4065   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4066   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4067   MPI_Request          *s_waits,*r_waits;
4068   MPI_Status           *status;
4069   MatScalar            *aa=a->a;
4070   MatScalar            **abuf_r,*ba_i;
4071   Mat_Merge_SeqsToMPI  *merge;
4072   PetscContainer       container;
4073 
4074   PetscFunctionBegin;
4075   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4076 
4077   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4078   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4079 
4080   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4081   if (container) {
4082     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4083   }
4084   bi     = merge->bi;
4085   bj     = merge->bj;
4086   buf_ri = merge->buf_ri;
4087   buf_rj = merge->buf_rj;
4088 
4089   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4090   owners = merge->rowmap->range;
4091   len_s  = merge->len_s;
4092 
4093   /* send and recv matrix values */
4094   /*-----------------------------*/
4095   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4096   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4097 
4098   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4099   for (proc=0,k=0; proc<size; proc++){
4100     if (!len_s[proc]) continue;
4101     i = owners[proc];
4102     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4103     k++;
4104   }
4105 
4106   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4107   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4108   ierr = PetscFree(status);CHKERRQ(ierr);
4109 
4110   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4111   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4112 
4113   /* insert mat values of mpimat */
4114   /*----------------------------*/
4115   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4116   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4117 
4118   for (k=0; k<merge->nrecv; k++){
4119     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4120     nrows = *(buf_ri_k[k]);
4121     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4122     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4123   }
4124 
4125   /* set values of ba */
4126   m = merge->rowmap->n;
4127   for (i=0; i<m; i++) {
4128     arow = owners[rank] + i;
4129     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4130     bnzi = bi[i+1] - bi[i];
4131     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4132 
4133     /* add local non-zero vals of this proc's seqmat into ba */
4134     anzi = ai[arow+1] - ai[arow];
4135     aj   = a->j + ai[arow];
4136     aa   = a->a + ai[arow];
4137     nextaj = 0;
4138     for (j=0; nextaj<anzi; j++){
4139       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4140         ba_i[j] += aa[nextaj++];
4141       }
4142     }
4143 
4144     /* add received vals into ba */
4145     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4146       /* i-th row */
4147       if (i == *nextrow[k]) {
4148         anzi = *(nextai[k]+1) - *nextai[k];
4149         aj   = buf_rj[k] + *(nextai[k]);
4150         aa   = abuf_r[k] + *(nextai[k]);
4151         nextaj = 0;
4152         for (j=0; nextaj<anzi; j++){
4153           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4154             ba_i[j] += aa[nextaj++];
4155           }
4156         }
4157         nextrow[k]++; nextai[k]++;
4158       }
4159     }
4160     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4161   }
4162   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4163   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4164 
4165   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4166   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4167   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4168   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4169   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4170   PetscFunctionReturn(0);
4171 }
4172 
4173 #undef __FUNCT__
4174 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4175 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4176 {
4177   PetscErrorCode       ierr;
4178   Mat                  B_mpi;
4179   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4180   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4181   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4182   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4183   PetscInt             len,proc,*dnz,*onz;
4184   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4185   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4186   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4187   MPI_Status           *status;
4188   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4189   PetscBT              lnkbt;
4190   Mat_Merge_SeqsToMPI  *merge;
4191   PetscContainer       container;
4192 
4193   PetscFunctionBegin;
4194   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4195 
4196   /* make sure it is a PETSc comm */
4197   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4199   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4200 
4201   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4202   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4203 
4204   /* determine row ownership */
4205   /*---------------------------------------------------------*/
4206   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4207   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4208   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4209   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4210   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4211   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4212   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4213 
4214   m      = merge->rowmap->n;
4215   M      = merge->rowmap->N;
4216   owners = merge->rowmap->range;
4217 
4218   /* determine the number of messages to send, their lengths */
4219   /*---------------------------------------------------------*/
4220   len_s  = merge->len_s;
4221 
4222   len = 0;  /* length of buf_si[] */
4223   merge->nsend = 0;
4224   for (proc=0; proc<size; proc++){
4225     len_si[proc] = 0;
4226     if (proc == rank){
4227       len_s[proc] = 0;
4228     } else {
4229       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4230       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4231     }
4232     if (len_s[proc]) {
4233       merge->nsend++;
4234       nrows = 0;
4235       for (i=owners[proc]; i<owners[proc+1]; i++){
4236         if (ai[i+1] > ai[i]) nrows++;
4237       }
4238       len_si[proc] = 2*(nrows+1);
4239       len += len_si[proc];
4240     }
4241   }
4242 
4243   /* determine the number and length of messages to receive for ij-structure */
4244   /*-------------------------------------------------------------------------*/
4245   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4246   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4247 
4248   /* post the Irecv of j-structure */
4249   /*-------------------------------*/
4250   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4251   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4252 
4253   /* post the Isend of j-structure */
4254   /*--------------------------------*/
4255   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4256 
4257   for (proc=0, k=0; proc<size; proc++){
4258     if (!len_s[proc]) continue;
4259     i = owners[proc];
4260     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4261     k++;
4262   }
4263 
4264   /* receives and sends of j-structure are complete */
4265   /*------------------------------------------------*/
4266   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4267   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4268 
4269   /* send and recv i-structure */
4270   /*---------------------------*/
4271   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4272   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4273 
4274   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4275   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4276   for (proc=0,k=0; proc<size; proc++){
4277     if (!len_s[proc]) continue;
4278     /* form outgoing message for i-structure:
4279          buf_si[0]:                 nrows to be sent
4280                [1:nrows]:           row index (global)
4281                [nrows+1:2*nrows+1]: i-structure index
4282     */
4283     /*-------------------------------------------*/
4284     nrows = len_si[proc]/2 - 1;
4285     buf_si_i    = buf_si + nrows+1;
4286     buf_si[0]   = nrows;
4287     buf_si_i[0] = 0;
4288     nrows = 0;
4289     for (i=owners[proc]; i<owners[proc+1]; i++){
4290       anzi = ai[i+1] - ai[i];
4291       if (anzi) {
4292         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4293         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4294         nrows++;
4295       }
4296     }
4297     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4298     k++;
4299     buf_si += len_si[proc];
4300   }
4301 
4302   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4303   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4304 
4305   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4306   for (i=0; i<merge->nrecv; i++){
4307     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4308   }
4309 
4310   ierr = PetscFree(len_si);CHKERRQ(ierr);
4311   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4312   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4313   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4314   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4315   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4316   ierr = PetscFree(status);CHKERRQ(ierr);
4317 
4318   /* compute a local seq matrix in each processor */
4319   /*----------------------------------------------*/
4320   /* allocate bi array and free space for accumulating nonzero column info */
4321   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4322   bi[0] = 0;
4323 
4324   /* create and initialize a linked list */
4325   nlnk = N+1;
4326   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4327 
4328   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4329   len = 0;
4330   len  = ai[owners[rank+1]] - ai[owners[rank]];
4331   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4332   current_space = free_space;
4333 
4334   /* determine symbolic info for each local row */
4335   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4336 
4337   for (k=0; k<merge->nrecv; k++){
4338     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4339     nrows = *buf_ri_k[k];
4340     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4341     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4342   }
4343 
4344   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4345   len = 0;
4346   for (i=0;i<m;i++) {
4347     bnzi   = 0;
4348     /* add local non-zero cols of this proc's seqmat into lnk */
4349     arow   = owners[rank] + i;
4350     anzi   = ai[arow+1] - ai[arow];
4351     aj     = a->j + ai[arow];
4352     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4353     bnzi += nlnk;
4354     /* add received col data into lnk */
4355     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4356       if (i == *nextrow[k]) { /* i-th row */
4357         anzi = *(nextai[k]+1) - *nextai[k];
4358         aj   = buf_rj[k] + *nextai[k];
4359         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4360         bnzi += nlnk;
4361         nextrow[k]++; nextai[k]++;
4362       }
4363     }
4364     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4365 
4366     /* if free space is not available, make more free space */
4367     if (current_space->local_remaining<bnzi) {
4368       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4369       nspacedouble++;
4370     }
4371     /* copy data into free space, then initialize lnk */
4372     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4373     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4374 
4375     current_space->array           += bnzi;
4376     current_space->local_used      += bnzi;
4377     current_space->local_remaining -= bnzi;
4378 
4379     bi[i+1] = bi[i] + bnzi;
4380   }
4381 
4382   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4383 
4384   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4385   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4386   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4387 
4388   /* create symbolic parallel matrix B_mpi */
4389   /*---------------------------------------*/
4390   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4391   if (n==PETSC_DECIDE) {
4392     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4393   } else {
4394     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4395   }
4396   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4397   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4398   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4399 
4400   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4401   B_mpi->assembled     = PETSC_FALSE;
4402   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4403   merge->bi            = bi;
4404   merge->bj            = bj;
4405   merge->buf_ri        = buf_ri;
4406   merge->buf_rj        = buf_rj;
4407   merge->coi           = PETSC_NULL;
4408   merge->coj           = PETSC_NULL;
4409   merge->owners_co     = PETSC_NULL;
4410 
4411   /* attach the supporting struct to B_mpi for reuse */
4412   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4413   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4414   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4415   *mpimat = B_mpi;
4416 
4417   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4418   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4419   PetscFunctionReturn(0);
4420 }
4421 
4422 #undef __FUNCT__
4423 #define __FUNCT__ "MatMerge_SeqsToMPI"
4424 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4425 {
4426   PetscErrorCode   ierr;
4427 
4428   PetscFunctionBegin;
4429   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4430   if (scall == MAT_INITIAL_MATRIX){
4431     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4432   }
4433   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4434   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4435   PetscFunctionReturn(0);
4436 }
4437 
4438 #undef __FUNCT__
4439 #define __FUNCT__ "MatGetLocalMat"
4440 /*@
4441      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4442 
4443     Not Collective
4444 
4445    Input Parameters:
4446 +    A - the matrix
4447 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4448 
4449    Output Parameter:
4450 .    A_loc - the local sequential matrix generated
4451 
4452     Level: developer
4453 
4454 @*/
4455 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4456 {
4457   PetscErrorCode  ierr;
4458   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4459   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4460   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4461   MatScalar       *aa=a->a,*ba=b->a,*cam;
4462   PetscScalar     *ca;
4463   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4464   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4465 
4466   PetscFunctionBegin;
4467   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4468   if (scall == MAT_INITIAL_MATRIX){
4469     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4470     ci[0] = 0;
4471     for (i=0; i<am; i++){
4472       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4473     }
4474     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4475     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4476     k = 0;
4477     for (i=0; i<am; i++) {
4478       ncols_o = bi[i+1] - bi[i];
4479       ncols_d = ai[i+1] - ai[i];
4480       /* off-diagonal portion of A */
4481       for (jo=0; jo<ncols_o; jo++) {
4482         col = cmap[*bj];
4483         if (col >= cstart) break;
4484         cj[k]   = col; bj++;
4485         ca[k++] = *ba++;
4486       }
4487       /* diagonal portion of A */
4488       for (j=0; j<ncols_d; j++) {
4489         cj[k]   = cstart + *aj++;
4490         ca[k++] = *aa++;
4491       }
4492       /* off-diagonal portion of A */
4493       for (j=jo; j<ncols_o; j++) {
4494         cj[k]   = cmap[*bj++];
4495         ca[k++] = *ba++;
4496       }
4497     }
4498     /* put together the new matrix */
4499     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4500     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4501     /* Since these are PETSc arrays, change flags to free them as necessary. */
4502     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4503     mat->free_a  = PETSC_TRUE;
4504     mat->free_ij = PETSC_TRUE;
4505     mat->nonew   = 0;
4506   } else if (scall == MAT_REUSE_MATRIX){
4507     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4508     ci = mat->i; cj = mat->j; cam = mat->a;
4509     for (i=0; i<am; i++) {
4510       /* off-diagonal portion of A */
4511       ncols_o = bi[i+1] - bi[i];
4512       for (jo=0; jo<ncols_o; jo++) {
4513         col = cmap[*bj];
4514         if (col >= cstart) break;
4515         *cam++ = *ba++; bj++;
4516       }
4517       /* diagonal portion of A */
4518       ncols_d = ai[i+1] - ai[i];
4519       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4520       /* off-diagonal portion of A */
4521       for (j=jo; j<ncols_o; j++) {
4522         *cam++ = *ba++; bj++;
4523       }
4524     }
4525   } else {
4526     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4527   }
4528 
4529   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4530   PetscFunctionReturn(0);
4531 }
4532 
4533 #undef __FUNCT__
4534 #define __FUNCT__ "MatGetLocalMatCondensed"
4535 /*@C
4536      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4537 
4538     Not Collective
4539 
4540    Input Parameters:
4541 +    A - the matrix
4542 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4543 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4544 
4545    Output Parameter:
4546 .    A_loc - the local sequential matrix generated
4547 
4548     Level: developer
4549 
4550 @*/
4551 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4552 {
4553   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4554   PetscErrorCode    ierr;
4555   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4556   IS                isrowa,iscola;
4557   Mat               *aloc;
4558 
4559   PetscFunctionBegin;
4560   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4561   if (!row){
4562     start = A->rmap->rstart; end = A->rmap->rend;
4563     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4564   } else {
4565     isrowa = *row;
4566   }
4567   if (!col){
4568     start = A->cmap->rstart;
4569     cmap  = a->garray;
4570     nzA   = a->A->cmap->n;
4571     nzB   = a->B->cmap->n;
4572     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4573     ncols = 0;
4574     for (i=0; i<nzB; i++) {
4575       if (cmap[i] < start) idx[ncols++] = cmap[i];
4576       else break;
4577     }
4578     imark = i;
4579     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4580     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4581     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4582     ierr = PetscFree(idx);CHKERRQ(ierr);
4583   } else {
4584     iscola = *col;
4585   }
4586   if (scall != MAT_INITIAL_MATRIX){
4587     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4588     aloc[0] = *A_loc;
4589   }
4590   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4591   *A_loc = aloc[0];
4592   ierr = PetscFree(aloc);CHKERRQ(ierr);
4593   if (!row){
4594     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4595   }
4596   if (!col){
4597     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4598   }
4599   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4600   PetscFunctionReturn(0);
4601 }
4602 
4603 #undef __FUNCT__
4604 #define __FUNCT__ "MatGetBrowsOfAcols"
4605 /*@C
4606     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4607 
4608     Collective on Mat
4609 
4610    Input Parameters:
4611 +    A,B - the matrices in mpiaij format
4612 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4613 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4614 
4615    Output Parameter:
4616 +    rowb, colb - index sets of rows and columns of B to extract
4617 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4618 -    B_seq - the sequential matrix generated
4619 
4620     Level: developer
4621 
4622 @*/
4623 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4624 {
4625   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4626   PetscErrorCode    ierr;
4627   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4628   IS                isrowb,iscolb;
4629   Mat               *bseq;
4630 
4631   PetscFunctionBegin;
4632   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4633     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4634   }
4635   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4636 
4637   if (scall == MAT_INITIAL_MATRIX){
4638     start = A->cmap->rstart;
4639     cmap  = a->garray;
4640     nzA   = a->A->cmap->n;
4641     nzB   = a->B->cmap->n;
4642     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4643     ncols = 0;
4644     for (i=0; i<nzB; i++) {  /* row < local row index */
4645       if (cmap[i] < start) idx[ncols++] = cmap[i];
4646       else break;
4647     }
4648     imark = i;
4649     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4650     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4651     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4652     ierr = PetscFree(idx);CHKERRQ(ierr);
4653     *brstart = imark;
4654     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4655   } else {
4656     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4657     isrowb = *rowb; iscolb = *colb;
4658     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4659     bseq[0] = *B_seq;
4660   }
4661   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4662   *B_seq = bseq[0];
4663   ierr = PetscFree(bseq);CHKERRQ(ierr);
4664   if (!rowb){
4665     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4666   } else {
4667     *rowb = isrowb;
4668   }
4669   if (!colb){
4670     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4671   } else {
4672     *colb = iscolb;
4673   }
4674   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4675   PetscFunctionReturn(0);
4676 }
4677 
4678 #undef __FUNCT__
4679 #define __FUNCT__ "MatGetBrowsOfAoCols"
4680 /*@C
4681     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4682     of the OFF-DIAGONAL portion of local A
4683 
4684     Collective on Mat
4685 
4686    Input Parameters:
4687 +    A,B - the matrices in mpiaij format
4688 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4689 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4690 .    startsj_r - similar to startsj for receives
4691 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4692 
4693    Output Parameter:
4694 +    B_oth - the sequential matrix generated
4695 
4696     Level: developer
4697 
4698 @*/
4699 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4700 {
4701   VecScatter_MPI_General *gen_to,*gen_from;
4702   PetscErrorCode         ierr;
4703   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4704   Mat_SeqAIJ             *b_oth;
4705   VecScatter             ctx=a->Mvctx;
4706   MPI_Comm               comm=((PetscObject)ctx)->comm;
4707   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4708   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4709   PetscScalar            *rvalues,*svalues;
4710   MatScalar              *b_otha,*bufa,*bufA;
4711   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4712   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4713   MPI_Status             *sstatus,rstatus;
4714   PetscMPIInt            jj;
4715   PetscInt               *cols,sbs,rbs;
4716   PetscScalar            *vals;
4717 
4718   PetscFunctionBegin;
4719   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4720     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4721   }
4722   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4723   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4724 
4725   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4726   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4727   rvalues  = gen_from->values; /* holds the length of receiving row */
4728   svalues  = gen_to->values;   /* holds the length of sending row */
4729   nrecvs   = gen_from->n;
4730   nsends   = gen_to->n;
4731 
4732   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4733   srow     = gen_to->indices;   /* local row index to be sent */
4734   sstarts  = gen_to->starts;
4735   sprocs   = gen_to->procs;
4736   sstatus  = gen_to->sstatus;
4737   sbs      = gen_to->bs;
4738   rstarts  = gen_from->starts;
4739   rprocs   = gen_from->procs;
4740   rbs      = gen_from->bs;
4741 
4742   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4743   if (scall == MAT_INITIAL_MATRIX){
4744     /* i-array */
4745     /*---------*/
4746     /*  post receives */
4747     for (i=0; i<nrecvs; i++){
4748       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4749       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4750       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4751     }
4752 
4753     /* pack the outgoing message */
4754     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4755     sstartsj[0] = 0;  rstartsj[0] = 0;
4756     len = 0; /* total length of j or a array to be sent */
4757     k = 0;
4758     for (i=0; i<nsends; i++){
4759       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4760       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4761       for (j=0; j<nrows; j++) {
4762         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4763         for (l=0; l<sbs; l++){
4764           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4765           rowlen[j*sbs+l] = ncols;
4766           len += ncols;
4767           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4768         }
4769         k++;
4770       }
4771       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4772       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4773     }
4774     /* recvs and sends of i-array are completed */
4775     i = nrecvs;
4776     while (i--) {
4777       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4778     }
4779     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4780 
4781     /* allocate buffers for sending j and a arrays */
4782     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4783     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4784 
4785     /* create i-array of B_oth */
4786     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4787     b_othi[0] = 0;
4788     len = 0; /* total length of j or a array to be received */
4789     k = 0;
4790     for (i=0; i<nrecvs; i++){
4791       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4792       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4793       for (j=0; j<nrows; j++) {
4794         b_othi[k+1] = b_othi[k] + rowlen[j];
4795         len += rowlen[j]; k++;
4796       }
4797       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4798     }
4799 
4800     /* allocate space for j and a arrrays of B_oth */
4801     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4802     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4803 
4804     /* j-array */
4805     /*---------*/
4806     /*  post receives of j-array */
4807     for (i=0; i<nrecvs; i++){
4808       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4809       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4810     }
4811 
4812     /* pack the outgoing message j-array */
4813     k = 0;
4814     for (i=0; i<nsends; i++){
4815       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4816       bufJ = bufj+sstartsj[i];
4817       for (j=0; j<nrows; j++) {
4818         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4819         for (ll=0; ll<sbs; ll++){
4820           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4821           for (l=0; l<ncols; l++){
4822             *bufJ++ = cols[l];
4823           }
4824           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4825         }
4826       }
4827       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4828     }
4829 
4830     /* recvs and sends of j-array are completed */
4831     i = nrecvs;
4832     while (i--) {
4833       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4834     }
4835     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4836   } else if (scall == MAT_REUSE_MATRIX){
4837     sstartsj = *startsj;
4838     rstartsj = *startsj_r;
4839     bufa     = *bufa_ptr;
4840     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4841     b_otha   = b_oth->a;
4842   } else {
4843     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4844   }
4845 
4846   /* a-array */
4847   /*---------*/
4848   /*  post receives of a-array */
4849   for (i=0; i<nrecvs; i++){
4850     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4851     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4852   }
4853 
4854   /* pack the outgoing message a-array */
4855   k = 0;
4856   for (i=0; i<nsends; i++){
4857     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4858     bufA = bufa+sstartsj[i];
4859     for (j=0; j<nrows; j++) {
4860       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4861       for (ll=0; ll<sbs; ll++){
4862         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4863         for (l=0; l<ncols; l++){
4864           *bufA++ = vals[l];
4865         }
4866         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4867       }
4868     }
4869     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4870   }
4871   /* recvs and sends of a-array are completed */
4872   i = nrecvs;
4873   while (i--) {
4874     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4875   }
4876   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4877   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4878 
4879   if (scall == MAT_INITIAL_MATRIX){
4880     /* put together the new matrix */
4881     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4882 
4883     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4884     /* Since these are PETSc arrays, change flags to free them as necessary. */
4885     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4886     b_oth->free_a  = PETSC_TRUE;
4887     b_oth->free_ij = PETSC_TRUE;
4888     b_oth->nonew   = 0;
4889 
4890     ierr = PetscFree(bufj);CHKERRQ(ierr);
4891     if (!startsj || !bufa_ptr){
4892       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4893       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4894     } else {
4895       *startsj   = sstartsj;
4896       *startsj_r = rstartsj;
4897       *bufa_ptr  = bufa;
4898     }
4899   }
4900   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4901   PetscFunctionReturn(0);
4902 }
4903 
4904 #undef __FUNCT__
4905 #define __FUNCT__ "MatGetCommunicationStructs"
4906 /*@C
4907   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4908 
4909   Not Collective
4910 
4911   Input Parameters:
4912 . A - The matrix in mpiaij format
4913 
4914   Output Parameter:
4915 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4916 . colmap - A map from global column index to local index into lvec
4917 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4918 
4919   Level: developer
4920 
4921 @*/
4922 #if defined (PETSC_USE_CTABLE)
4923 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4924 #else
4925 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4926 #endif
4927 {
4928   Mat_MPIAIJ *a;
4929 
4930   PetscFunctionBegin;
4931   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4932   PetscValidPointer(lvec, 2);
4933   PetscValidPointer(colmap, 3);
4934   PetscValidPointer(multScatter, 4);
4935   a = (Mat_MPIAIJ *) A->data;
4936   if (lvec) *lvec = a->lvec;
4937   if (colmap) *colmap = a->colmap;
4938   if (multScatter) *multScatter = a->Mvctx;
4939   PetscFunctionReturn(0);
4940 }
4941 
4942 EXTERN_C_BEGIN
4943 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4944 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4945 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4946 EXTERN_C_END
4947 
4948 #undef __FUNCT__
4949 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4950 /*
4951     Computes (B'*A')' since computing B*A directly is untenable
4952 
4953                n                       p                          p
4954         (              )       (              )         (                  )
4955       m (      A       )  *  n (       B      )   =   m (         C        )
4956         (              )       (              )         (                  )
4957 
4958 */
4959 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4960 {
4961   PetscErrorCode     ierr;
4962   Mat                At,Bt,Ct;
4963 
4964   PetscFunctionBegin;
4965   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4966   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4967   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4968   ierr = MatDestroy(At);CHKERRQ(ierr);
4969   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4970   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4971   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4972   PetscFunctionReturn(0);
4973 }
4974 
4975 #undef __FUNCT__
4976 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4977 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4978 {
4979   PetscErrorCode ierr;
4980   PetscInt       m=A->rmap->n,n=B->cmap->n;
4981   Mat            Cmat;
4982 
4983   PetscFunctionBegin;
4984   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4985   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4986   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4987   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4988   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4989   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4990   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4991   *C   = Cmat;
4992   PetscFunctionReturn(0);
4993 }
4994 
4995 /* ----------------------------------------------------------------*/
4996 #undef __FUNCT__
4997 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4998 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4999 {
5000   PetscErrorCode ierr;
5001 
5002   PetscFunctionBegin;
5003   if (scall == MAT_INITIAL_MATRIX){
5004     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5005   }
5006   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5007   PetscFunctionReturn(0);
5008 }
5009 
5010 EXTERN_C_BEGIN
5011 #if defined(PETSC_HAVE_MUMPS)
5012 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5013 #endif
5014 #if defined(PETSC_HAVE_PASTIX)
5015 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5016 #endif
5017 #if defined(PETSC_HAVE_SUPERLU_DIST)
5018 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5019 #endif
5020 #if defined(PETSC_HAVE_SPOOLES)
5021 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5022 #endif
5023 EXTERN_C_END
5024 
5025 /*MC
5026    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5027 
5028    Options Database Keys:
5029 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5030 
5031   Level: beginner
5032 
5033 .seealso: MatCreateMPIAIJ()
5034 M*/
5035 
5036 EXTERN_C_BEGIN
5037 #undef __FUNCT__
5038 #define __FUNCT__ "MatCreate_MPIAIJ"
5039 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5040 {
5041   Mat_MPIAIJ     *b;
5042   PetscErrorCode ierr;
5043   PetscMPIInt    size;
5044 
5045   PetscFunctionBegin;
5046   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5047 
5048   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5049   B->data         = (void*)b;
5050   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5051   B->rmap->bs     = 1;
5052   B->assembled    = PETSC_FALSE;
5053   B->mapping      = 0;
5054 
5055   B->insertmode   = NOT_SET_VALUES;
5056   b->size         = size;
5057   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5058 
5059   /* build cache for off array entries formed */
5060   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5061   b->donotstash  = PETSC_FALSE;
5062   b->colmap      = 0;
5063   b->garray      = 0;
5064   b->roworiented = PETSC_TRUE;
5065 
5066   /* stuff used for matrix vector multiply */
5067   b->lvec      = PETSC_NULL;
5068   b->Mvctx     = PETSC_NULL;
5069 
5070   /* stuff for MatGetRow() */
5071   b->rowindices   = 0;
5072   b->rowvalues    = 0;
5073   b->getrowactive = PETSC_FALSE;
5074 
5075 #if defined(PETSC_HAVE_SPOOLES)
5076   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5077                                      "MatGetFactor_mpiaij_spooles",
5078                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5079 #endif
5080 #if defined(PETSC_HAVE_MUMPS)
5081   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5082                                      "MatGetFactor_aij_mumps",
5083                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5084 #endif
5085 #if defined(PETSC_HAVE_PASTIX)
5086   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5087 					   "MatGetFactor_mpiaij_pastix",
5088 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5089 #endif
5090 #if defined(PETSC_HAVE_SUPERLU_DIST)
5091   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5092                                      "MatGetFactor_mpiaij_superlu_dist",
5093                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5094 #endif
5095   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5096                                      "MatStoreValues_MPIAIJ",
5097                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5098   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5099                                      "MatRetrieveValues_MPIAIJ",
5100                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5101   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5102 				     "MatGetDiagonalBlock_MPIAIJ",
5103                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5104   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5105 				     "MatIsTranspose_MPIAIJ",
5106 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5107   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5108 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5109 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5110   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5111 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5112 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5113   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5114 				     "MatDiagonalScaleLocal_MPIAIJ",
5115 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5116   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5117                                      "MatConvert_MPIAIJ_MPICSRPERM",
5118                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5119   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5120                                      "MatConvert_MPIAIJ_MPICRL",
5121                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5122   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5123                                      "MatConvert_MPIAIJ_MPISBAIJ",
5124                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5125   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5126                                      "MatMatMult_MPIDense_MPIAIJ",
5127                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5128   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5129                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5130                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5131   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5132                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5133                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5134   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5135   PetscFunctionReturn(0);
5136 }
5137 EXTERN_C_END
5138 
5139 #undef __FUNCT__
5140 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5141 /*@
5142      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5143          and "off-diagonal" part of the matrix in CSR format.
5144 
5145    Collective on MPI_Comm
5146 
5147    Input Parameters:
5148 +  comm - MPI communicator
5149 .  m - number of local rows (Cannot be PETSC_DECIDE)
5150 .  n - This value should be the same as the local size used in creating the
5151        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5152        calculated if N is given) For square matrices n is almost always m.
5153 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5154 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5155 .   i - row indices for "diagonal" portion of matrix
5156 .   j - column indices
5157 .   a - matrix values
5158 .   oi - row indices for "off-diagonal" portion of matrix
5159 .   oj - column indices
5160 -   oa - matrix values
5161 
5162    Output Parameter:
5163 .   mat - the matrix
5164 
5165    Level: advanced
5166 
5167    Notes:
5168        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5169 
5170        The i and j indices are 0 based
5171 
5172        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5173 
5174        This sets local rows and cannot be used to set off-processor values.
5175 
5176        You cannot later use MatSetValues() to change values in this matrix.
5177 
5178 .keywords: matrix, aij, compressed row, sparse, parallel
5179 
5180 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5181           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5182 @*/
5183 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5184 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5185 {
5186   PetscErrorCode ierr;
5187   Mat_MPIAIJ     *maij;
5188 
5189  PetscFunctionBegin;
5190   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5191   if (i[0]) {
5192     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5193   }
5194   if (oi[0]) {
5195     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5196   }
5197   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5198   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5199   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5200   maij = (Mat_MPIAIJ*) (*mat)->data;
5201   maij->donotstash     = PETSC_TRUE;
5202   (*mat)->preallocated = PETSC_TRUE;
5203 
5204   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5205   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5206   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5207   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5208 
5209   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5210   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5211 
5212   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5213   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5214   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5215   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5216 
5217   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5218   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5219   PetscFunctionReturn(0);
5220 }
5221 
5222 /*
5223     Special version for direct calls from Fortran
5224 */
5225 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5226 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5227 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5228 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5229 #endif
5230 
5231 /* Change these macros so can be used in void function */
5232 #undef CHKERRQ
5233 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5234 #undef SETERRQ2
5235 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5236 #undef SETERRQ
5237 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5238 
5239 EXTERN_C_BEGIN
5240 #undef __FUNCT__
5241 #define __FUNCT__ "matsetvaluesmpiaij_"
5242 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5243 {
5244   Mat             mat = *mmat;
5245   PetscInt        m = *mm, n = *mn;
5246   InsertMode      addv = *maddv;
5247   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5248   PetscScalar     value;
5249   PetscErrorCode  ierr;
5250 
5251   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5252   if (mat->insertmode == NOT_SET_VALUES) {
5253     mat->insertmode = addv;
5254   }
5255 #if defined(PETSC_USE_DEBUG)
5256   else if (mat->insertmode != addv) {
5257     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5258   }
5259 #endif
5260   {
5261   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5262   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5263   PetscTruth      roworiented = aij->roworiented;
5264 
5265   /* Some Variables required in the macro */
5266   Mat             A = aij->A;
5267   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5268   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5269   MatScalar       *aa = a->a;
5270   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5271   Mat             B = aij->B;
5272   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5273   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5274   MatScalar       *ba = b->a;
5275 
5276   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5277   PetscInt        nonew = a->nonew;
5278   MatScalar       *ap1,*ap2;
5279 
5280   PetscFunctionBegin;
5281   for (i=0; i<m; i++) {
5282     if (im[i] < 0) continue;
5283 #if defined(PETSC_USE_DEBUG)
5284     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5285 #endif
5286     if (im[i] >= rstart && im[i] < rend) {
5287       row      = im[i] - rstart;
5288       lastcol1 = -1;
5289       rp1      = aj + ai[row];
5290       ap1      = aa + ai[row];
5291       rmax1    = aimax[row];
5292       nrow1    = ailen[row];
5293       low1     = 0;
5294       high1    = nrow1;
5295       lastcol2 = -1;
5296       rp2      = bj + bi[row];
5297       ap2      = ba + bi[row];
5298       rmax2    = bimax[row];
5299       nrow2    = bilen[row];
5300       low2     = 0;
5301       high2    = nrow2;
5302 
5303       for (j=0; j<n; j++) {
5304         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5305         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5306         if (in[j] >= cstart && in[j] < cend){
5307           col = in[j] - cstart;
5308           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5309         } else if (in[j] < 0) continue;
5310 #if defined(PETSC_USE_DEBUG)
5311         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5312 #endif
5313         else {
5314           if (mat->was_assembled) {
5315             if (!aij->colmap) {
5316               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5317             }
5318 #if defined (PETSC_USE_CTABLE)
5319             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5320 	    col--;
5321 #else
5322             col = aij->colmap[in[j]] - 1;
5323 #endif
5324             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5325               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5326               col =  in[j];
5327               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5328               B = aij->B;
5329               b = (Mat_SeqAIJ*)B->data;
5330               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5331               rp2      = bj + bi[row];
5332               ap2      = ba + bi[row];
5333               rmax2    = bimax[row];
5334               nrow2    = bilen[row];
5335               low2     = 0;
5336               high2    = nrow2;
5337               bm       = aij->B->rmap->n;
5338               ba = b->a;
5339             }
5340           } else col = in[j];
5341           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5342         }
5343       }
5344     } else {
5345       if (!aij->donotstash) {
5346         if (roworiented) {
5347           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5348         } else {
5349           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5350         }
5351       }
5352     }
5353   }}
5354   PetscFunctionReturnVoid();
5355 }
5356 EXTERN_C_END
5357 
5358