xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 42eaa7f3b58bc3af4b9ef28b03ccaa8b4c1282ac)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscTruth     roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscTruth     ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash || mat->nooffprocentries) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   mat->insertmode = addv; /* in case this processor had no cache */
470 
471   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
472   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
473   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
480 {
481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
482   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
483   PetscErrorCode ierr;
484   PetscMPIInt    n;
485   PetscInt       i,j,rstart,ncols,flg;
486   PetscInt       *row,*col;
487   PetscTruth     other_disassembled;
488   PetscScalar    *val;
489   InsertMode     addv = mat->insertmode;
490 
491   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
492   PetscFunctionBegin;
493   if (!aij->donotstash && !mat->nooffprocentries) {
494     while (1) {
495       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
496       if (!flg) break;
497 
498       for (i=0; i<n;) {
499         /* Now identify the consecutive vals belonging to the same row */
500         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
501         if (j < n) ncols = j-i;
502         else       ncols = n-i;
503         /* Now assemble all these values with a single function call */
504         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
505         i = j;
506       }
507     }
508     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
509   }
510   a->compressedrow.use     = PETSC_FALSE;
511   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
512   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
513 
514   /* determine if any processor has disassembled, if so we must
515      also disassemble ourselfs, in order that we may reassemble. */
516   /*
517      if nonzero structure of submatrix B cannot change then we know that
518      no processor disassembled thus we can skip this stuff
519   */
520   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
521     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
522     if (mat->was_assembled && !other_disassembled) {
523       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
524     }
525   }
526   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
527     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
528   }
529   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
530   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
531   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
532   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
533 
534   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
535   aij->rowvalues = 0;
536 
537   /* used by MatAXPY() */
538   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
539   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
540 
541   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
542   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap->range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscTruth     found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   if (A->nooffproczerorows) {
606     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
607     nrecvs = nsends;
608     nmax   = N;
609   } else {
610     /* inform other processors of number of messages and max length*/
611     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
612   }
613 
614   /* post receives:   */
615   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
616   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
617   for (i=0; i<nrecvs; i++) {
618     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
619   }
620 
621   /* do sends:
622       1) starts[i] gives the starting index in svalues for stuff going to
623          the ith processor
624   */
625   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
626   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
627   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
628   starts[0] = 0;
629   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   for (i=0; i<N; i++) {
631     svalues[starts[owner[i]]++] = rows[i];
632   }
633 
634   starts[0] = 0;
635   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
636   count = 0;
637   for (i=0; i<size; i++) {
638     if (nprocs[2*i+1]) {
639       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
640     }
641   }
642   ierr = PetscFree(starts);CHKERRQ(ierr);
643 
644   base = owners[rank];
645 
646   /*  wait on receives */
647   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
648   count  = nrecvs; slen = 0;
649   while (count) {
650     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
651     /* unpack receives into our local space */
652     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
653     source[imdex]  = recv_status.MPI_SOURCE;
654     lens[imdex]    = n;
655     slen          += n;
656     count--;
657   }
658   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
659 
660   /* move the data into the send scatter */
661   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
662   count = 0;
663   for (i=0; i<nrecvs; i++) {
664     values = rvalues + i*nmax;
665     for (j=0; j<lens[i]; j++) {
666       lrows[count++] = values[j] - base;
667     }
668   }
669   ierr = PetscFree(rvalues);CHKERRQ(ierr);
670   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
671   ierr = PetscFree(owner);CHKERRQ(ierr);
672   ierr = PetscFree(nprocs);CHKERRQ(ierr);
673 
674   /* actually zap the local rows */
675   /*
676         Zero the required rows. If the "diagonal block" of the matrix
677      is square and the user wishes to set the diagonal we use separate
678      code so that MatSetValues() is not called for each diagonal allocating
679      new memory, thus calling lots of mallocs and slowing things down.
680 
681   */
682   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
683   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
684   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
685     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
686   } else if (diag != 0.0) {
687     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
688     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
689       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
690 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
691     }
692     for (i = 0; i < slen; i++) {
693       row  = lrows[i] + rstart;
694       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
695     }
696     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
697     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
698   } else {
699     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
700   }
701   ierr = PetscFree(lrows);CHKERRQ(ierr);
702 
703   /* wait on sends */
704   if (nsends) {
705     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
706     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
707     ierr = PetscFree(send_status);CHKERRQ(ierr);
708   }
709   ierr = PetscFree(send_waits);CHKERRQ(ierr);
710   ierr = PetscFree(svalues);CHKERRQ(ierr);
711 
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatMult_MPIAIJ"
717 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
718 {
719   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
720   PetscErrorCode ierr;
721   PetscInt       nt;
722 
723   PetscFunctionBegin;
724   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
725   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
726   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
728   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
729   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
730   PetscFunctionReturn(0);
731 }
732 
733 #undef __FUNCT__
734 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
735 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
736 {
737   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
738   PetscErrorCode ierr;
739 
740   PetscFunctionBegin;
741   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatMultAdd_MPIAIJ"
747 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
748 {
749   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
754   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
755   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
756   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
757   PetscFunctionReturn(0);
758 }
759 
760 #undef __FUNCT__
761 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
762 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
763 {
764   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
765   PetscErrorCode ierr;
766   PetscTruth     merged;
767 
768   PetscFunctionBegin;
769   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
770   /* do nondiagonal part */
771   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
772   if (!merged) {
773     /* send it on its way */
774     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
775     /* do local part */
776     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
777     /* receive remote parts: note this assumes the values are not actually */
778     /* added in yy until the next line, */
779     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780   } else {
781     /* do local part */
782     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
783     /* send it on its way */
784     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785     /* values actually were received in the Begin() but we need to call this nop */
786     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
787   }
788   PetscFunctionReturn(0);
789 }
790 
791 EXTERN_C_BEGIN
792 #undef __FUNCT__
793 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
794 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
795 {
796   MPI_Comm       comm;
797   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
798   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
799   IS             Me,Notme;
800   PetscErrorCode ierr;
801   PetscInt       M,N,first,last,*notme,i;
802   PetscMPIInt    size;
803 
804   PetscFunctionBegin;
805 
806   /* Easy test: symmetric diagonal block */
807   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
808   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
809   if (!*f) PetscFunctionReturn(0);
810   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
811   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
812   if (size == 1) PetscFunctionReturn(0);
813 
814   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
815   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
816   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
817   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
818   for (i=0; i<first; i++) notme[i] = i;
819   for (i=last; i<M; i++) notme[i-last+first] = i;
820   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
821   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
822   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
823   Aoff = Aoffs[0];
824   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
825   Boff = Boffs[0];
826   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
827   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
828   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
829   ierr = ISDestroy(Me);CHKERRQ(ierr);
830   ierr = ISDestroy(Notme);CHKERRQ(ierr);
831 
832   PetscFunctionReturn(0);
833 }
834 EXTERN_C_END
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
838 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
839 {
840   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode ierr;
842 
843   PetscFunctionBegin;
844   /* do nondiagonal part */
845   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
846   /* send it on its way */
847   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
848   /* do local part */
849   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
850   /* receive remote parts */
851   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
852   PetscFunctionReturn(0);
853 }
854 
855 /*
856   This only works correctly for square matrices where the subblock A->A is the
857    diagonal block
858 */
859 #undef __FUNCT__
860 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
861 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
862 {
863   PetscErrorCode ierr;
864   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
865 
866   PetscFunctionBegin;
867   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
868   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
869   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIAIJ"
875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
876 {
877   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
882   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatDestroy_MPIAIJ"
888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
889 {
890   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_LOG)
895   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
896 #endif
897   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
898   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
899   if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);}
900   if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);}
901 #if defined (PETSC_USE_CTABLE)
902   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
903 #else
904   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
905 #endif
906   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
907   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
908   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
909   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
910   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
911   ierr = PetscFree(aij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatView_MPIAIJ_Binary"
927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
928 {
929   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
930   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
931   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
932   PetscErrorCode    ierr;
933   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
934   int               fd;
935   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
936   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
937   PetscScalar       *column_values;
938 
939   PetscFunctionBegin;
940   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
941   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
942   nz   = A->nz + B->nz;
943   if (!rank) {
944     header[0] = MAT_FILE_CLASSID;
945     header[1] = mat->rmap->N;
946     header[2] = mat->cmap->N;
947     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
948     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
949     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
950     /* get largest number of rows any processor has */
951     rlen = mat->rmap->n;
952     range = mat->rmap->range;
953     for (i=1; i<size; i++) {
954       rlen = PetscMax(rlen,range[i+1] - range[i]);
955     }
956   } else {
957     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
958     rlen = mat->rmap->n;
959   }
960 
961   /* load up the local row counts */
962   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
963   for (i=0; i<mat->rmap->n; i++) {
964     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
965   }
966 
967   /* store the row lengths to the file */
968   if (!rank) {
969     MPI_Status status;
970     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
971     for (i=1; i<size; i++) {
972       rlen = range[i+1] - range[i];
973       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
974       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
975     }
976   } else {
977     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
978   }
979   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
980 
981   /* load up the local column indices */
982   nzmax = nz; /* )th processor needs space a largest processor needs */
983   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
984   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
985   cnt  = 0;
986   for (i=0; i<mat->rmap->n; i++) {
987     for (j=B->i[i]; j<B->i[i+1]; j++) {
988       if ( (col = garray[B->j[j]]) > cstart) break;
989       column_indices[cnt++] = col;
990     }
991     for (k=A->i[i]; k<A->i[i+1]; k++) {
992       column_indices[cnt++] = A->j[k] + cstart;
993     }
994     for (; j<B->i[i+1]; j++) {
995       column_indices[cnt++] = garray[B->j[j]];
996     }
997   }
998   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
999 
1000   /* store the column indices to the file */
1001   if (!rank) {
1002     MPI_Status status;
1003     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1004     for (i=1; i<size; i++) {
1005       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1006       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1007       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1008       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1009     }
1010   } else {
1011     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1012     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the local column values */
1017   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<mat->rmap->n; i++) {
1020     for (j=B->i[i]; j<B->i[i+1]; j++) {
1021       if ( garray[B->j[j]] > cstart) break;
1022       column_values[cnt++] = B->a[j];
1023     }
1024     for (k=A->i[i]; k<A->i[i+1]; k++) {
1025       column_values[cnt++] = A->a[k];
1026     }
1027     for (; j<B->i[i+1]; j++) {
1028       column_values[cnt++] = B->a[j];
1029     }
1030   }
1031   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1032 
1033   /* store the column values to the file */
1034   if (!rank) {
1035     MPI_Status status;
1036     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1037     for (i=1; i<size; i++) {
1038       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1039       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1040       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1041       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1042     }
1043   } else {
1044     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1045     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046   }
1047   ierr = PetscFree(column_values);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1054 {
1055   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1056   PetscErrorCode    ierr;
1057   PetscMPIInt       rank = aij->rank,size = aij->size;
1058   PetscTruth        isdraw,iascii,isbinary;
1059   PetscViewer       sviewer;
1060   PetscViewerFormat format;
1061 
1062   PetscFunctionBegin;
1063   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1065   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1066   if (iascii) {
1067     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1068     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1069       MatInfo    info;
1070       PetscTruth inodes;
1071 
1072       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1073       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1074       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1075       if (!inodes) {
1076         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1077 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1078       } else {
1079         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1080 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1081       }
1082       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1083       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1084       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1085       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1086       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1087       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1088       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1089       PetscFunctionReturn(0);
1090     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1091       PetscInt   inodecount,inodelimit,*inodes;
1092       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1093       if (inodes) {
1094         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1095       } else {
1096         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1097       }
1098       PetscFunctionReturn(0);
1099     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1100       PetscFunctionReturn(0);
1101     }
1102   } else if (isbinary) {
1103     if (size == 1) {
1104       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106     } else {
1107       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1108     }
1109     PetscFunctionReturn(0);
1110   } else if (isdraw) {
1111     PetscDraw  draw;
1112     PetscTruth isnull;
1113     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1114     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1115   }
1116 
1117   if (size == 1) {
1118     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1119     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1120   } else {
1121     /* assemble the entire matrix onto first processor. */
1122     Mat         A;
1123     Mat_SeqAIJ  *Aloc;
1124     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1125     MatScalar   *a;
1126 
1127     if (mat->rmap->N > 1024) {
1128       PetscTruth flg = PETSC_FALSE;
1129 
1130       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1131       if (!flg) {
1132         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1133       }
1134     }
1135 
1136     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1137     if (!rank) {
1138       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1139     } else {
1140       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1141     }
1142     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1143     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1144     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1145     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1146 
1147     /* copy over the A part */
1148     Aloc = (Mat_SeqAIJ*)aij->A->data;
1149     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1150     row = mat->rmap->rstart;
1151     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1155     }
1156     aj = Aloc->j;
1157     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1158 
1159     /* copy over the B part */
1160     Aloc = (Mat_SeqAIJ*)aij->B->data;
1161     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1162     row  = mat->rmap->rstart;
1163     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1164     ct   = cols;
1165     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1166     for (i=0; i<m; i++) {
1167       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1168       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1169     }
1170     ierr = PetscFree(ct);CHKERRQ(ierr);
1171     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     /*
1174        Everyone has to call to draw the matrix since the graphics waits are
1175        synchronized across all processors that share the PetscDraw object
1176     */
1177     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1178     if (!rank) {
1179       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1180       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1181     }
1182     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     ierr = MatDestroy(A);CHKERRQ(ierr);
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_MPIAIJ"
1190 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1191 {
1192   PetscErrorCode ierr;
1193   PetscTruth     iascii,isdraw,issocket,isbinary;
1194 
1195   PetscFunctionBegin;
1196   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1197   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1200   if (iascii || isdraw || isbinary || issocket) {
1201     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1202   } else {
1203     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1204   }
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatSOR_MPIAIJ"
1210 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1211 {
1212   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1213   PetscErrorCode ierr;
1214   Vec            bb1 = 0;
1215   PetscTruth     hasop;
1216 
1217   PetscFunctionBegin;
1218   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1219     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1220   }
1221 
1222   if (flag == SOR_APPLY_UPPER) {
1223     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1224     PetscFunctionReturn(0);
1225   }
1226 
1227   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1228     if (flag & SOR_ZERO_INITIAL_GUESS) {
1229       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1230       its--;
1231     }
1232 
1233     while (its--) {
1234       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1235       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1236 
1237       /* update rhs: bb1 = bb - B*x */
1238       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1239       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1240 
1241       /* local sweep */
1242       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1243     }
1244   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1245     if (flag & SOR_ZERO_INITIAL_GUESS) {
1246       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1247       its--;
1248     }
1249     while (its--) {
1250       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1251       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1252 
1253       /* update rhs: bb1 = bb - B*x */
1254       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1255       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1256 
1257       /* local sweep */
1258       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1259     }
1260   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1261     if (flag & SOR_ZERO_INITIAL_GUESS) {
1262       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1263       its--;
1264     }
1265     while (its--) {
1266       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1267       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268 
1269       /* update rhs: bb1 = bb - B*x */
1270       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1271       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1272 
1273       /* local sweep */
1274       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1275     }
1276   }  else if (flag & SOR_EISENSTAT) {
1277     Vec         xx1;
1278 
1279     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1280     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1281 
1282     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284     if (!mat->diag) {
1285       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1286       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1287     }
1288     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1289     if (hasop) {
1290       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1291     } else {
1292       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1293     }
1294     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1295 
1296     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1297 
1298     /* local sweep */
1299     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1300     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1301     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1302   } else {
1303     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1304   }
1305 
1306   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatPermute_MPIAIJ"
1312 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1313 {
1314   MPI_Comm       comm,pcomm;
1315   PetscInt       first,local_size,nrows;
1316   const PetscInt *rows;
1317   PetscMPIInt    size;
1318   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1319   PetscErrorCode ierr;
1320 
1321   PetscFunctionBegin;
1322   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1323   /* make a collective version of 'rowp' */
1324   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1325   if (pcomm==comm) {
1326     crowp = rowp;
1327   } else {
1328     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1329     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1330     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1331     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1332   }
1333   /* collect the global row permutation and invert it */
1334   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1335   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1336   if (pcomm!=comm) {
1337     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1338   }
1339   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1340   /* get the local target indices */
1341   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1342   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1343   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1344   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1345   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1346   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1347   /* the column permutation is so much easier;
1348      make a local version of 'colp' and invert it */
1349   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1350   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1351   if (size==1) {
1352     lcolp = colp;
1353   } else {
1354     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1355     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1357   }
1358   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1359   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1360   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1361   if (size>1) {
1362     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1363     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1364   }
1365   /* now we just get the submatrix */
1366   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1367   /* clean up */
1368   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1369   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1375 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1376 {
1377   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1378   Mat            A = mat->A,B = mat->B;
1379   PetscErrorCode ierr;
1380   PetscReal      isend[5],irecv[5];
1381 
1382   PetscFunctionBegin;
1383   info->block_size     = 1.0;
1384   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1385   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1386   isend[3] = info->memory;  isend[4] = info->mallocs;
1387   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1388   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1389   isend[3] += info->memory;  isend[4] += info->mallocs;
1390   if (flag == MAT_LOCAL) {
1391     info->nz_used      = isend[0];
1392     info->nz_allocated = isend[1];
1393     info->nz_unneeded  = isend[2];
1394     info->memory       = isend[3];
1395     info->mallocs      = isend[4];
1396   } else if (flag == MAT_GLOBAL_MAX) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   } else if (flag == MAT_GLOBAL_SUM) {
1404     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1405     info->nz_used      = irecv[0];
1406     info->nz_allocated = irecv[1];
1407     info->nz_unneeded  = irecv[2];
1408     info->memory       = irecv[3];
1409     info->mallocs      = irecv[4];
1410   }
1411   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1412   info->fill_ratio_needed = 0;
1413   info->factor_mallocs    = 0;
1414 
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatSetOption_MPIAIJ"
1420 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1421 {
1422   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1423   PetscErrorCode ierr;
1424 
1425   PetscFunctionBegin;
1426   switch (op) {
1427   case MAT_NEW_NONZERO_LOCATIONS:
1428   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1429   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1430   case MAT_KEEP_NONZERO_PATTERN:
1431   case MAT_NEW_NONZERO_LOCATION_ERR:
1432   case MAT_USE_INODES:
1433   case MAT_IGNORE_ZERO_ENTRIES:
1434     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1435     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1436     break;
1437   case MAT_ROW_ORIENTED:
1438     a->roworiented = flg;
1439     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1440     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1441     break;
1442   case MAT_NEW_DIAGONALS:
1443     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1444     break;
1445   case MAT_IGNORE_OFF_PROC_ENTRIES:
1446     a->donotstash = PETSC_TRUE;
1447     break;
1448   case MAT_SPD:
1449     A->spd_set                         = PETSC_TRUE;
1450     A->spd                             = flg;
1451     if (flg) {
1452       A->symmetric                     = PETSC_TRUE;
1453       A->structurally_symmetric        = PETSC_TRUE;
1454       A->symmetric_set                 = PETSC_TRUE;
1455       A->structurally_symmetric_set    = PETSC_TRUE;
1456     }
1457     break;
1458   case MAT_SYMMETRIC:
1459     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1460     break;
1461   case MAT_STRUCTURALLY_SYMMETRIC:
1462     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1463     break;
1464   case MAT_HERMITIAN:
1465     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1466     break;
1467   case MAT_SYMMETRY_ETERNAL:
1468     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1469     break;
1470   default:
1471     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatGetRow_MPIAIJ"
1478 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1479 {
1480   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1481   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1482   PetscErrorCode ierr;
1483   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1484   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1485   PetscInt       *cmap,*idx_p;
1486 
1487   PetscFunctionBegin;
1488   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1489   mat->getrowactive = PETSC_TRUE;
1490 
1491   if (!mat->rowvalues && (idx || v)) {
1492     /*
1493         allocate enough space to hold information from the longest row.
1494     */
1495     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1496     PetscInt   max = 1,tmp;
1497     for (i=0; i<matin->rmap->n; i++) {
1498       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1499       if (max < tmp) { max = tmp; }
1500     }
1501     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1502   }
1503 
1504   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1505   lrow = row - rstart;
1506 
1507   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1508   if (!v)   {pvA = 0; pvB = 0;}
1509   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1510   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1511   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1512   nztot = nzA + nzB;
1513 
1514   cmap  = mat->garray;
1515   if (v  || idx) {
1516     if (nztot) {
1517       /* Sort by increasing column numbers, assuming A and B already sorted */
1518       PetscInt imark = -1;
1519       if (v) {
1520         *v = v_p = mat->rowvalues;
1521         for (i=0; i<nzB; i++) {
1522           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1523           else break;
1524         }
1525         imark = i;
1526         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1527         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1528       }
1529       if (idx) {
1530         *idx = idx_p = mat->rowindices;
1531         if (imark > -1) {
1532           for (i=0; i<imark; i++) {
1533             idx_p[i] = cmap[cworkB[i]];
1534           }
1535         } else {
1536           for (i=0; i<nzB; i++) {
1537             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1538             else break;
1539           }
1540           imark = i;
1541         }
1542         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1543         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1544       }
1545     } else {
1546       if (idx) *idx = 0;
1547       if (v)   *v   = 0;
1548     }
1549   }
1550   *nz = nztot;
1551   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1552   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1558 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1559 {
1560   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1561 
1562   PetscFunctionBegin;
1563   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1564   aij->getrowactive = PETSC_FALSE;
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatNorm_MPIAIJ"
1570 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1571 {
1572   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1573   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1574   PetscErrorCode ierr;
1575   PetscInt       i,j,cstart = mat->cmap->rstart;
1576   PetscReal      sum = 0.0;
1577   MatScalar      *v;
1578 
1579   PetscFunctionBegin;
1580   if (aij->size == 1) {
1581     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1582   } else {
1583     if (type == NORM_FROBENIUS) {
1584       v = amat->a;
1585       for (i=0; i<amat->nz; i++) {
1586 #if defined(PETSC_USE_COMPLEX)
1587         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1588 #else
1589         sum += (*v)*(*v); v++;
1590 #endif
1591       }
1592       v = bmat->a;
1593       for (i=0; i<bmat->nz; i++) {
1594 #if defined(PETSC_USE_COMPLEX)
1595         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1596 #else
1597         sum += (*v)*(*v); v++;
1598 #endif
1599       }
1600       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1601       *norm = sqrt(*norm);
1602     } else if (type == NORM_1) { /* max column norm */
1603       PetscReal *tmp,*tmp2;
1604       PetscInt  *jj,*garray = aij->garray;
1605       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1606       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1607       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1608       *norm = 0.0;
1609       v = amat->a; jj = amat->j;
1610       for (j=0; j<amat->nz; j++) {
1611         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1612       }
1613       v = bmat->a; jj = bmat->j;
1614       for (j=0; j<bmat->nz; j++) {
1615         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1616       }
1617       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1618       for (j=0; j<mat->cmap->N; j++) {
1619         if (tmp2[j] > *norm) *norm = tmp2[j];
1620       }
1621       ierr = PetscFree(tmp);CHKERRQ(ierr);
1622       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1623     } else if (type == NORM_INFINITY) { /* max row norm */
1624       PetscReal ntemp = 0.0;
1625       for (j=0; j<aij->A->rmap->n; j++) {
1626         v = amat->a + amat->i[j];
1627         sum = 0.0;
1628         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1629           sum += PetscAbsScalar(*v); v++;
1630         }
1631         v = bmat->a + bmat->i[j];
1632         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1633           sum += PetscAbsScalar(*v); v++;
1634         }
1635         if (sum > ntemp) ntemp = sum;
1636       }
1637       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1638     } else {
1639       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1640     }
1641   }
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 #undef __FUNCT__
1646 #define __FUNCT__ "MatTranspose_MPIAIJ"
1647 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1648 {
1649   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1650   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1651   PetscErrorCode ierr;
1652   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1653   PetscInt       cstart=A->cmap->rstart,ncol;
1654   Mat            B;
1655   MatScalar      *array;
1656 
1657   PetscFunctionBegin;
1658   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1659 
1660   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1661   ai = Aloc->i; aj = Aloc->j;
1662   bi = Bloc->i; bj = Bloc->j;
1663   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1664     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1665     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1666     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1667     for (i=0; i<ai[ma]; i++){
1668       d_nnz[aj[i]] ++;
1669       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1670     }
1671 
1672     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1673     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1674     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1675     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1676     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1677   } else {
1678     B = *matout;
1679   }
1680 
1681   /* copy over the A part */
1682   array = Aloc->a;
1683   row = A->rmap->rstart;
1684   for (i=0; i<ma; i++) {
1685     ncol = ai[i+1]-ai[i];
1686     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1687     row++; array += ncol; aj += ncol;
1688   }
1689   aj = Aloc->j;
1690   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1691 
1692   /* copy over the B part */
1693   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1694   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1695   array = Bloc->a;
1696   row = A->rmap->rstart;
1697   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1698   cols_tmp = cols;
1699   for (i=0; i<mb; i++) {
1700     ncol = bi[i+1]-bi[i];
1701     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1702     row++; array += ncol; cols_tmp += ncol;
1703   }
1704   ierr = PetscFree(cols);CHKERRQ(ierr);
1705 
1706   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1707   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1708   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1709     *matout = B;
1710   } else {
1711     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1712   }
1713   PetscFunctionReturn(0);
1714 }
1715 
1716 #undef __FUNCT__
1717 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1718 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1719 {
1720   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1721   Mat            a = aij->A,b = aij->B;
1722   PetscErrorCode ierr;
1723   PetscInt       s1,s2,s3;
1724 
1725   PetscFunctionBegin;
1726   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1727   if (rr) {
1728     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1729     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1730     /* Overlap communication with computation. */
1731     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1732   }
1733   if (ll) {
1734     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1735     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1736     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1737   }
1738   /* scale  the diagonal block */
1739   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1740 
1741   if (rr) {
1742     /* Do a scatter end and then right scale the off-diagonal block */
1743     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1744     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1745   }
1746 
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 #undef __FUNCT__
1751 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1752 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1753 {
1754   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1755   PetscErrorCode ierr;
1756 
1757   PetscFunctionBegin;
1758   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1759   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1760   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1761   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1762   PetscFunctionReturn(0);
1763 }
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1766 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1767 {
1768   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1769   PetscErrorCode ierr;
1770 
1771   PetscFunctionBegin;
1772   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 #undef __FUNCT__
1777 #define __FUNCT__ "MatEqual_MPIAIJ"
1778 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1779 {
1780   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1781   Mat            a,b,c,d;
1782   PetscTruth     flg;
1783   PetscErrorCode ierr;
1784 
1785   PetscFunctionBegin;
1786   a = matA->A; b = matA->B;
1787   c = matB->A; d = matB->B;
1788 
1789   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1790   if (flg) {
1791     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1792   }
1793   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 #undef __FUNCT__
1798 #define __FUNCT__ "MatCopy_MPIAIJ"
1799 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1800 {
1801   PetscErrorCode ierr;
1802   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1803   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1804 
1805   PetscFunctionBegin;
1806   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1807   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1808     /* because of the column compression in the off-processor part of the matrix a->B,
1809        the number of columns in a->B and b->B may be different, hence we cannot call
1810        the MatCopy() directly on the two parts. If need be, we can provide a more
1811        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1812        then copying the submatrices */
1813     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1814   } else {
1815     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1816     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1817   }
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 #undef __FUNCT__
1822 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1823 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1824 {
1825   PetscErrorCode ierr;
1826 
1827   PetscFunctionBegin;
1828   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 #undef __FUNCT__
1833 #define __FUNCT__ "MatAXPY_MPIAIJ"
1834 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1835 {
1836   PetscErrorCode ierr;
1837   PetscInt       i;
1838   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1839   PetscBLASInt   bnz,one=1;
1840   Mat_SeqAIJ     *x,*y;
1841 
1842   PetscFunctionBegin;
1843   if (str == SAME_NONZERO_PATTERN) {
1844     PetscScalar alpha = a;
1845     x = (Mat_SeqAIJ *)xx->A->data;
1846     y = (Mat_SeqAIJ *)yy->A->data;
1847     bnz = PetscBLASIntCast(x->nz);
1848     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1849     x = (Mat_SeqAIJ *)xx->B->data;
1850     y = (Mat_SeqAIJ *)yy->B->data;
1851     bnz = PetscBLASIntCast(x->nz);
1852     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1853   } else if (str == SUBSET_NONZERO_PATTERN) {
1854     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1855 
1856     x = (Mat_SeqAIJ *)xx->B->data;
1857     y = (Mat_SeqAIJ *)yy->B->data;
1858     if (y->xtoy && y->XtoY != xx->B) {
1859       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1860       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1861     }
1862     if (!y->xtoy) { /* get xtoy */
1863       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1864       y->XtoY = xx->B;
1865       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1866     }
1867     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1868   } else {
1869     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1870   }
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1875 
1876 #undef __FUNCT__
1877 #define __FUNCT__ "MatConjugate_MPIAIJ"
1878 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1879 {
1880 #if defined(PETSC_USE_COMPLEX)
1881   PetscErrorCode ierr;
1882   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1883 
1884   PetscFunctionBegin;
1885   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1886   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1887 #else
1888   PetscFunctionBegin;
1889 #endif
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatRealPart_MPIAIJ"
1895 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1896 {
1897   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1898   PetscErrorCode ierr;
1899 
1900   PetscFunctionBegin;
1901   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1902   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 #undef __FUNCT__
1907 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1908 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1909 {
1910   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1911   PetscErrorCode ierr;
1912 
1913   PetscFunctionBegin;
1914   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1915   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 #ifdef PETSC_HAVE_PBGL
1920 
1921 #include <boost/parallel/mpi/bsp_process_group.hpp>
1922 #include <boost/graph/distributed/ilu_default_graph.hpp>
1923 #include <boost/graph/distributed/ilu_0_block.hpp>
1924 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1925 #include <boost/graph/distributed/petsc/interface.hpp>
1926 #include <boost/multi_array.hpp>
1927 #include <boost/parallel/distributed_property_map->hpp>
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1931 /*
1932   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1933 */
1934 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1935 {
1936   namespace petsc = boost::distributed::petsc;
1937 
1938   namespace graph_dist = boost::graph::distributed;
1939   using boost::graph::distributed::ilu_default::process_group_type;
1940   using boost::graph::ilu_permuted;
1941 
1942   PetscTruth      row_identity, col_identity;
1943   PetscContainer  c;
1944   PetscInt        m, n, M, N;
1945   PetscErrorCode  ierr;
1946 
1947   PetscFunctionBegin;
1948   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1949   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1950   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1951   if (!row_identity || !col_identity) {
1952     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1953   }
1954 
1955   process_group_type pg;
1956   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1957   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1958   lgraph_type&   level_graph = *lgraph_p;
1959   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1960 
1961   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1962   ilu_permuted(level_graph);
1963 
1964   /* put together the new matrix */
1965   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1966   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1967   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1968   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1969   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1970   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1971   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1972 
1973   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1974   ierr = PetscContainerSetPointer(c, lgraph_p);
1975   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1981 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1982 {
1983   PetscFunctionBegin;
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 #undef __FUNCT__
1988 #define __FUNCT__ "MatSolve_MPIAIJ"
1989 /*
1990   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1991 */
1992 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1993 {
1994   namespace graph_dist = boost::graph::distributed;
1995 
1996   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1997   lgraph_type*   lgraph_p;
1998   PetscContainer c;
1999   PetscErrorCode ierr;
2000 
2001   PetscFunctionBegin;
2002   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2003   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2004   ierr = VecCopy(b, x);CHKERRQ(ierr);
2005 
2006   PetscScalar* array_x;
2007   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2008   PetscInt sx;
2009   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2010 
2011   PetscScalar* array_b;
2012   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2013   PetscInt sb;
2014   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2015 
2016   lgraph_type&   level_graph = *lgraph_p;
2017   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2018 
2019   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2020   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2021                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2022 
2023   typedef boost::iterator_property_map<array_ref_type::iterator,
2024                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2025   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2026                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2027 
2028   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2029 
2030   PetscFunctionReturn(0);
2031 }
2032 #endif
2033 
2034 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2035   PetscInt       nzlocal,nsends,nrecvs;
2036   PetscMPIInt    *send_rank,*recv_rank;
2037   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2038   PetscScalar    *sbuf_a,**rbuf_a;
2039   PetscErrorCode (*MatDestroy)(Mat);
2040 } Mat_Redundant;
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2044 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2045 {
2046   PetscErrorCode       ierr;
2047   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2048   PetscInt             i;
2049 
2050   PetscFunctionBegin;
2051   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2052   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2053   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2054   for (i=0; i<redund->nrecvs; i++){
2055     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2056     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2057   }
2058   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2059   ierr = PetscFree(redund);CHKERRQ(ierr);
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 #undef __FUNCT__
2064 #define __FUNCT__ "MatDestroy_MatRedundant"
2065 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2066 {
2067   PetscErrorCode  ierr;
2068   PetscContainer  container;
2069   Mat_Redundant   *redund=PETSC_NULL;
2070 
2071   PetscFunctionBegin;
2072   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2073   if (container) {
2074     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2075   } else {
2076     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2077   }
2078   A->ops->destroy = redund->MatDestroy;
2079   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2080   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2081   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 #undef __FUNCT__
2086 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2087 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2088 {
2089   PetscMPIInt    rank,size;
2090   MPI_Comm       comm=((PetscObject)mat)->comm;
2091   PetscErrorCode ierr;
2092   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2093   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2094   PetscInt       *rowrange=mat->rmap->range;
2095   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2096   Mat            A=aij->A,B=aij->B,C=*matredundant;
2097   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2098   PetscScalar    *sbuf_a;
2099   PetscInt       nzlocal=a->nz+b->nz;
2100   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2101   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2102   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2103   MatScalar      *aworkA,*aworkB;
2104   PetscScalar    *vals;
2105   PetscMPIInt    tag1,tag2,tag3,imdex;
2106   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2107                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2108   MPI_Status     recv_status,*send_status;
2109   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2110   PetscInt       **rbuf_j=PETSC_NULL;
2111   PetscScalar    **rbuf_a=PETSC_NULL;
2112   Mat_Redundant  *redund=PETSC_NULL;
2113   PetscContainer container;
2114 
2115   PetscFunctionBegin;
2116   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2117   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2118 
2119   if (reuse == MAT_REUSE_MATRIX) {
2120     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2121     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2122     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2123     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2124     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2125     if (container) {
2126       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2127     } else {
2128       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2129     }
2130     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2131 
2132     nsends    = redund->nsends;
2133     nrecvs    = redund->nrecvs;
2134     send_rank = redund->send_rank;
2135     recv_rank = redund->recv_rank;
2136     sbuf_nz   = redund->sbuf_nz;
2137     rbuf_nz   = redund->rbuf_nz;
2138     sbuf_j    = redund->sbuf_j;
2139     sbuf_a    = redund->sbuf_a;
2140     rbuf_j    = redund->rbuf_j;
2141     rbuf_a    = redund->rbuf_a;
2142   }
2143 
2144   if (reuse == MAT_INITIAL_MATRIX){
2145     PetscMPIInt  subrank,subsize;
2146     PetscInt     nleftover,np_subcomm;
2147     /* get the destination processors' id send_rank, nsends and nrecvs */
2148     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2149     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2150     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2151     np_subcomm = size/nsubcomm;
2152     nleftover  = size - nsubcomm*np_subcomm;
2153     nsends = 0; nrecvs = 0;
2154     for (i=0; i<size; i++){ /* i=rank*/
2155       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2156         send_rank[nsends] = i; nsends++;
2157         recv_rank[nrecvs++] = i;
2158       }
2159     }
2160     if (rank >= size - nleftover){/* this proc is a leftover processor */
2161       i = size-nleftover-1;
2162       j = 0;
2163       while (j < nsubcomm - nleftover){
2164         send_rank[nsends++] = i;
2165         i--; j++;
2166       }
2167     }
2168 
2169     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2170       for (i=0; i<nleftover; i++){
2171         recv_rank[nrecvs++] = size-nleftover+i;
2172       }
2173     }
2174 
2175     /* allocate sbuf_j, sbuf_a */
2176     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2177     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2178     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2179   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2180 
2181   /* copy mat's local entries into the buffers */
2182   if (reuse == MAT_INITIAL_MATRIX){
2183     rownz_max = 0;
2184     rptr = sbuf_j;
2185     cols = sbuf_j + rend-rstart + 1;
2186     vals = sbuf_a;
2187     rptr[0] = 0;
2188     for (i=0; i<rend-rstart; i++){
2189       row = i + rstart;
2190       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2191       ncols  = nzA + nzB;
2192       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2193       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2194       /* load the column indices for this row into cols */
2195       lwrite = 0;
2196       for (l=0; l<nzB; l++) {
2197         if ((ctmp = bmap[cworkB[l]]) < cstart){
2198           vals[lwrite]   = aworkB[l];
2199           cols[lwrite++] = ctmp;
2200         }
2201       }
2202       for (l=0; l<nzA; l++){
2203         vals[lwrite]   = aworkA[l];
2204         cols[lwrite++] = cstart + cworkA[l];
2205       }
2206       for (l=0; l<nzB; l++) {
2207         if ((ctmp = bmap[cworkB[l]]) >= cend){
2208           vals[lwrite]   = aworkB[l];
2209           cols[lwrite++] = ctmp;
2210         }
2211       }
2212       vals += ncols;
2213       cols += ncols;
2214       rptr[i+1] = rptr[i] + ncols;
2215       if (rownz_max < ncols) rownz_max = ncols;
2216     }
2217     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2218   } else { /* only copy matrix values into sbuf_a */
2219     rptr = sbuf_j;
2220     vals = sbuf_a;
2221     rptr[0] = 0;
2222     for (i=0; i<rend-rstart; i++){
2223       row = i + rstart;
2224       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2225       ncols  = nzA + nzB;
2226       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2227       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2228       lwrite = 0;
2229       for (l=0; l<nzB; l++) {
2230         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2231       }
2232       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2233       for (l=0; l<nzB; l++) {
2234         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2235       }
2236       vals += ncols;
2237       rptr[i+1] = rptr[i] + ncols;
2238     }
2239   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2240 
2241   /* send nzlocal to others, and recv other's nzlocal */
2242   /*--------------------------------------------------*/
2243   if (reuse == MAT_INITIAL_MATRIX){
2244     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2245     s_waits2 = s_waits3 + nsends;
2246     s_waits1 = s_waits2 + nsends;
2247     r_waits1 = s_waits1 + nsends;
2248     r_waits2 = r_waits1 + nrecvs;
2249     r_waits3 = r_waits2 + nrecvs;
2250   } else {
2251     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2252     r_waits3 = s_waits3 + nsends;
2253   }
2254 
2255   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2256   if (reuse == MAT_INITIAL_MATRIX){
2257     /* get new tags to keep the communication clean */
2258     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2259     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2260     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2261 
2262     /* post receives of other's nzlocal */
2263     for (i=0; i<nrecvs; i++){
2264       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2265     }
2266     /* send nzlocal to others */
2267     for (i=0; i<nsends; i++){
2268       sbuf_nz[i] = nzlocal;
2269       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2270     }
2271     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2272     count = nrecvs;
2273     while (count) {
2274       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2275       recv_rank[imdex] = recv_status.MPI_SOURCE;
2276       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2277       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2278 
2279       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2280       rbuf_nz[imdex] += i + 2;
2281       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2282       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2283       count--;
2284     }
2285     /* wait on sends of nzlocal */
2286     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2287     /* send mat->i,j to others, and recv from other's */
2288     /*------------------------------------------------*/
2289     for (i=0; i<nsends; i++){
2290       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2291       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2292     }
2293     /* wait on receives of mat->i,j */
2294     /*------------------------------*/
2295     count = nrecvs;
2296     while (count) {
2297       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2298       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2299       count--;
2300     }
2301     /* wait on sends of mat->i,j */
2302     /*---------------------------*/
2303     if (nsends) {
2304       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2305     }
2306   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2307 
2308   /* post receives, send and receive mat->a */
2309   /*----------------------------------------*/
2310   for (imdex=0; imdex<nrecvs; imdex++) {
2311     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2312   }
2313   for (i=0; i<nsends; i++){
2314     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2315   }
2316   count = nrecvs;
2317   while (count) {
2318     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2319     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2320     count--;
2321   }
2322   if (nsends) {
2323     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2324   }
2325 
2326   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2327 
2328   /* create redundant matrix */
2329   /*-------------------------*/
2330   if (reuse == MAT_INITIAL_MATRIX){
2331     /* compute rownz_max for preallocation */
2332     for (imdex=0; imdex<nrecvs; imdex++){
2333       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2334       rptr = rbuf_j[imdex];
2335       for (i=0; i<j; i++){
2336         ncols = rptr[i+1] - rptr[i];
2337         if (rownz_max < ncols) rownz_max = ncols;
2338       }
2339     }
2340 
2341     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2342     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2343     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2344     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2345     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2346   } else {
2347     C = *matredundant;
2348   }
2349 
2350   /* insert local matrix entries */
2351   rptr = sbuf_j;
2352   cols = sbuf_j + rend-rstart + 1;
2353   vals = sbuf_a;
2354   for (i=0; i<rend-rstart; i++){
2355     row   = i + rstart;
2356     ncols = rptr[i+1] - rptr[i];
2357     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2358     vals += ncols;
2359     cols += ncols;
2360   }
2361   /* insert received matrix entries */
2362   for (imdex=0; imdex<nrecvs; imdex++){
2363     rstart = rowrange[recv_rank[imdex]];
2364     rend   = rowrange[recv_rank[imdex]+1];
2365     rptr = rbuf_j[imdex];
2366     cols = rbuf_j[imdex] + rend-rstart + 1;
2367     vals = rbuf_a[imdex];
2368     for (i=0; i<rend-rstart; i++){
2369       row   = i + rstart;
2370       ncols = rptr[i+1] - rptr[i];
2371       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2372       vals += ncols;
2373       cols += ncols;
2374     }
2375   }
2376   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2377   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2378   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2379   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2380   if (reuse == MAT_INITIAL_MATRIX){
2381     PetscContainer container;
2382     *matredundant = C;
2383     /* create a supporting struct and attach it to C for reuse */
2384     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2385     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2386     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2387     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2388     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2389 
2390     redund->nzlocal = nzlocal;
2391     redund->nsends  = nsends;
2392     redund->nrecvs  = nrecvs;
2393     redund->send_rank = send_rank;
2394     redund->recv_rank = recv_rank;
2395     redund->sbuf_nz = sbuf_nz;
2396     redund->rbuf_nz = rbuf_nz;
2397     redund->sbuf_j  = sbuf_j;
2398     redund->sbuf_a  = sbuf_a;
2399     redund->rbuf_j  = rbuf_j;
2400     redund->rbuf_a  = rbuf_a;
2401 
2402     redund->MatDestroy = C->ops->destroy;
2403     C->ops->destroy    = MatDestroy_MatRedundant;
2404   }
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 #undef __FUNCT__
2409 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2410 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2411 {
2412   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2413   PetscErrorCode ierr;
2414   PetscInt       i,*idxb = 0;
2415   PetscScalar    *va,*vb;
2416   Vec            vtmp;
2417 
2418   PetscFunctionBegin;
2419   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2420   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2421   if (idx) {
2422     for (i=0; i<A->rmap->n; i++) {
2423       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2424     }
2425   }
2426 
2427   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2428   if (idx) {
2429     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2430   }
2431   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2432   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2433 
2434   for (i=0; i<A->rmap->n; i++){
2435     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2436       va[i] = vb[i];
2437       if (idx) idx[i] = a->garray[idxb[i]];
2438     }
2439   }
2440 
2441   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2442   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2443   if (idxb) {
2444     ierr = PetscFree(idxb);CHKERRQ(ierr);
2445   }
2446   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2447   PetscFunctionReturn(0);
2448 }
2449 
2450 #undef __FUNCT__
2451 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2452 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2453 {
2454   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2455   PetscErrorCode ierr;
2456   PetscInt       i,*idxb = 0;
2457   PetscScalar    *va,*vb;
2458   Vec            vtmp;
2459 
2460   PetscFunctionBegin;
2461   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2462   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2463   if (idx) {
2464     for (i=0; i<A->cmap->n; i++) {
2465       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2466     }
2467   }
2468 
2469   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2470   if (idx) {
2471     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2472   }
2473   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2474   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2475 
2476   for (i=0; i<A->rmap->n; i++){
2477     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2478       va[i] = vb[i];
2479       if (idx) idx[i] = a->garray[idxb[i]];
2480     }
2481   }
2482 
2483   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2484   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2485   if (idxb) {
2486     ierr = PetscFree(idxb);CHKERRQ(ierr);
2487   }
2488   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 #undef __FUNCT__
2493 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2494 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2495 {
2496   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2497   PetscInt       n      = A->rmap->n;
2498   PetscInt       cstart = A->cmap->rstart;
2499   PetscInt      *cmap   = mat->garray;
2500   PetscInt      *diagIdx, *offdiagIdx;
2501   Vec            diagV, offdiagV;
2502   PetscScalar   *a, *diagA, *offdiagA;
2503   PetscInt       r;
2504   PetscErrorCode ierr;
2505 
2506   PetscFunctionBegin;
2507   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2508   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2509   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2510   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2511   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2512   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2513   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2514   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2515   for(r = 0; r < n; ++r) {
2516     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2517       a[r]   = diagA[r];
2518       idx[r] = cstart + diagIdx[r];
2519     } else {
2520       a[r]   = offdiagA[r];
2521       idx[r] = cmap[offdiagIdx[r]];
2522     }
2523   }
2524   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2525   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2526   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2527   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2528   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2529   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2530   PetscFunctionReturn(0);
2531 }
2532 
2533 #undef __FUNCT__
2534 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2535 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2536 {
2537   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2538   PetscInt       n      = A->rmap->n;
2539   PetscInt       cstart = A->cmap->rstart;
2540   PetscInt      *cmap   = mat->garray;
2541   PetscInt      *diagIdx, *offdiagIdx;
2542   Vec            diagV, offdiagV;
2543   PetscScalar   *a, *diagA, *offdiagA;
2544   PetscInt       r;
2545   PetscErrorCode ierr;
2546 
2547   PetscFunctionBegin;
2548   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2549   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2550   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2551   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2552   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2553   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2554   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2555   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2556   for(r = 0; r < n; ++r) {
2557     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2558       a[r]   = diagA[r];
2559       idx[r] = cstart + diagIdx[r];
2560     } else {
2561       a[r]   = offdiagA[r];
2562       idx[r] = cmap[offdiagIdx[r]];
2563     }
2564   }
2565   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2566   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2567   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2568   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2569   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2570   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 #undef __FUNCT__
2575 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2576 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2577 {
2578   PetscErrorCode ierr;
2579   Mat            *dummy;
2580 
2581   PetscFunctionBegin;
2582   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2583   *newmat = *dummy;
2584   ierr = PetscFree(dummy);CHKERRQ(ierr);
2585   PetscFunctionReturn(0);
2586 }
2587 
2588 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2589 /* -------------------------------------------------------------------*/
2590 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2591        MatGetRow_MPIAIJ,
2592        MatRestoreRow_MPIAIJ,
2593        MatMult_MPIAIJ,
2594 /* 4*/ MatMultAdd_MPIAIJ,
2595        MatMultTranspose_MPIAIJ,
2596        MatMultTransposeAdd_MPIAIJ,
2597 #ifdef PETSC_HAVE_PBGL
2598        MatSolve_MPIAIJ,
2599 #else
2600        0,
2601 #endif
2602        0,
2603        0,
2604 /*10*/ 0,
2605        0,
2606        0,
2607        MatSOR_MPIAIJ,
2608        MatTranspose_MPIAIJ,
2609 /*15*/ MatGetInfo_MPIAIJ,
2610        MatEqual_MPIAIJ,
2611        MatGetDiagonal_MPIAIJ,
2612        MatDiagonalScale_MPIAIJ,
2613        MatNorm_MPIAIJ,
2614 /*20*/ MatAssemblyBegin_MPIAIJ,
2615        MatAssemblyEnd_MPIAIJ,
2616        MatSetOption_MPIAIJ,
2617        MatZeroEntries_MPIAIJ,
2618 /*24*/ MatZeroRows_MPIAIJ,
2619        0,
2620 #ifdef PETSC_HAVE_PBGL
2621        0,
2622 #else
2623        0,
2624 #endif
2625        0,
2626        0,
2627 /*29*/ MatSetUpPreallocation_MPIAIJ,
2628 #ifdef PETSC_HAVE_PBGL
2629        0,
2630 #else
2631        0,
2632 #endif
2633        0,
2634        0,
2635        0,
2636 /*34*/ MatDuplicate_MPIAIJ,
2637        0,
2638        0,
2639        0,
2640        0,
2641 /*39*/ MatAXPY_MPIAIJ,
2642        MatGetSubMatrices_MPIAIJ,
2643        MatIncreaseOverlap_MPIAIJ,
2644        MatGetValues_MPIAIJ,
2645        MatCopy_MPIAIJ,
2646 /*44*/ MatGetRowMax_MPIAIJ,
2647        MatScale_MPIAIJ,
2648        0,
2649        0,
2650        0,
2651 /*49*/ MatSetBlockSize_MPIAIJ,
2652        0,
2653        0,
2654        0,
2655        0,
2656 /*54*/ MatFDColoringCreate_MPIAIJ,
2657        0,
2658        MatSetUnfactored_MPIAIJ,
2659        MatPermute_MPIAIJ,
2660        0,
2661 /*59*/ MatGetSubMatrix_MPIAIJ,
2662        MatDestroy_MPIAIJ,
2663        MatView_MPIAIJ,
2664        0,
2665        0,
2666 /*64*/ 0,
2667        0,
2668        0,
2669        0,
2670        0,
2671 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2672        MatGetRowMinAbs_MPIAIJ,
2673        0,
2674        MatSetColoring_MPIAIJ,
2675 #if defined(PETSC_HAVE_ADIC)
2676        MatSetValuesAdic_MPIAIJ,
2677 #else
2678        0,
2679 #endif
2680        MatSetValuesAdifor_MPIAIJ,
2681 /*75*/ MatFDColoringApply_AIJ,
2682        0,
2683        0,
2684        0,
2685        0,
2686 /*80*/ 0,
2687        0,
2688        0,
2689 /*83*/ MatLoad_MPIAIJ,
2690        0,
2691        0,
2692        0,
2693        0,
2694        0,
2695 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2696        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2697        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2698        MatPtAP_Basic,
2699        MatPtAPSymbolic_MPIAIJ,
2700 /*94*/ MatPtAPNumeric_MPIAIJ,
2701        0,
2702        0,
2703        0,
2704        0,
2705 /*99*/ 0,
2706        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2707        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2708        MatConjugate_MPIAIJ,
2709        0,
2710 /*104*/MatSetValuesRow_MPIAIJ,
2711        MatRealPart_MPIAIJ,
2712        MatImaginaryPart_MPIAIJ,
2713        0,
2714        0,
2715 /*109*/0,
2716        MatGetRedundantMatrix_MPIAIJ,
2717        MatGetRowMin_MPIAIJ,
2718        0,
2719        0,
2720 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2721        0,
2722        0,
2723        0,
2724        0,
2725 /*119*/0,
2726        0,
2727        0,
2728        0,
2729        MatGetMultiProcBlock_MPIAIJ
2730 };
2731 
2732 /* ----------------------------------------------------------------------------------------*/
2733 
2734 EXTERN_C_BEGIN
2735 #undef __FUNCT__
2736 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2737 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2738 {
2739   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2740   PetscErrorCode ierr;
2741 
2742   PetscFunctionBegin;
2743   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2744   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2745   PetscFunctionReturn(0);
2746 }
2747 EXTERN_C_END
2748 
2749 EXTERN_C_BEGIN
2750 #undef __FUNCT__
2751 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2752 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2753 {
2754   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2755   PetscErrorCode ierr;
2756 
2757   PetscFunctionBegin;
2758   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2759   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2760   PetscFunctionReturn(0);
2761 }
2762 EXTERN_C_END
2763 
2764 EXTERN_C_BEGIN
2765 #undef __FUNCT__
2766 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2767 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2768 {
2769   Mat_MPIAIJ     *b;
2770   PetscErrorCode ierr;
2771   PetscInt       i;
2772 
2773   PetscFunctionBegin;
2774   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2775   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2776   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2777   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2778 
2779   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2780   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2781   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2782   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2783   if (d_nnz) {
2784     for (i=0; i<B->rmap->n; i++) {
2785       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2786     }
2787   }
2788   if (o_nnz) {
2789     for (i=0; i<B->rmap->n; i++) {
2790       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2791     }
2792   }
2793   b = (Mat_MPIAIJ*)B->data;
2794 
2795   if (!B->preallocated) {
2796     /* Explicitly create 2 MATSEQAIJ matrices. */
2797     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2798     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2799     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2800     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2801     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2802     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2803     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2804     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2805   }
2806 
2807   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2808   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2809   B->preallocated = PETSC_TRUE;
2810   PetscFunctionReturn(0);
2811 }
2812 EXTERN_C_END
2813 
2814 #undef __FUNCT__
2815 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2816 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2817 {
2818   Mat            mat;
2819   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2820   PetscErrorCode ierr;
2821 
2822   PetscFunctionBegin;
2823   *newmat       = 0;
2824   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2825   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2826   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2827   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2828   a    = (Mat_MPIAIJ*)mat->data;
2829 
2830   mat->factortype    = matin->factortype;
2831   mat->rmap->bs      = matin->rmap->bs;
2832   mat->assembled    = PETSC_TRUE;
2833   mat->insertmode   = NOT_SET_VALUES;
2834   mat->preallocated = PETSC_TRUE;
2835 
2836   a->size           = oldmat->size;
2837   a->rank           = oldmat->rank;
2838   a->donotstash     = oldmat->donotstash;
2839   a->roworiented    = oldmat->roworiented;
2840   a->rowindices     = 0;
2841   a->rowvalues      = 0;
2842   a->getrowactive   = PETSC_FALSE;
2843 
2844   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2845   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2846 
2847   if (oldmat->colmap) {
2848 #if defined (PETSC_USE_CTABLE)
2849     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2850 #else
2851     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2852     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2853     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2854 #endif
2855   } else a->colmap = 0;
2856   if (oldmat->garray) {
2857     PetscInt len;
2858     len  = oldmat->B->cmap->n;
2859     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2860     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2861     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2862   } else a->garray = 0;
2863 
2864   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2865   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2866   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2867   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2868   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2869   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2870   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2871   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2872   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2873   *newmat = mat;
2874   PetscFunctionReturn(0);
2875 }
2876 
2877 /*
2878     Allows sending/receiving larger messages then 2 gigabytes in a single call
2879 */
2880 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
2881 {
2882   int             ierr;
2883   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2884   PetscInt        i,numchunks;
2885   PetscMPIInt     icnt;
2886 
2887   numchunks = cnt/CHUNKSIZE + 1;
2888   for (i=0; i<numchunks; i++) {
2889     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2890     ierr = MPI_Send(mess,icnt,type,to,tag,comm);
2891     if (type == MPIU_INT) {
2892       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2893     } else if (type == MPIU_SCALAR) {
2894       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2895     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2896   }
2897   return 0;
2898 }
2899 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
2900 {
2901   int             ierr;
2902   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2903   MPI_Status      status;
2904   PetscInt        i,numchunks;
2905   PetscMPIInt     icnt;
2906 
2907   numchunks = cnt/CHUNKSIZE + 1;
2908   for (i=0; i<numchunks; i++) {
2909     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2910     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);
2911     if (type == MPIU_INT) {
2912       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2913     } else if (type == MPIU_SCALAR) {
2914       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2915     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2916   }
2917   return 0;
2918 }
2919 
2920 #undef __FUNCT__
2921 #define __FUNCT__ "MatLoad_MPIAIJ"
2922 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2923 {
2924   PetscScalar    *vals,*svals;
2925   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2926   PetscErrorCode ierr;
2927   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2928   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2929   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2930   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2931   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2932   int            fd;
2933 
2934   PetscFunctionBegin;
2935   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2936   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2937   if (!rank) {
2938     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2939     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2940     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2941   }
2942 
2943   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2944 
2945   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2946   M = header[1]; N = header[2];
2947   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2948   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2949   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2950 
2951   /* If global sizes are set, check if they are consistent with that given in the file */
2952   if (sizesset) {
2953     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2954   }
2955   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2956   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2957 
2958   /* determine ownership of all rows */
2959   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
2960   else m = newMat->rmap->n; /* Set by user */
2961 
2962   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2963   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2964 
2965   /* First process needs enough room for process with most rows */
2966   if (!rank) {
2967     mmax       = rowners[1];
2968     for (i=2; i<size; i++) {
2969       mmax = PetscMax(mmax,rowners[i]);
2970     }
2971   } else mmax = m;
2972 
2973   rowners[0] = 0;
2974   for (i=2; i<=size; i++) {
2975     rowners[i] += rowners[i-1];
2976   }
2977   rstart = rowners[rank];
2978   rend   = rowners[rank+1];
2979 
2980   /* distribute row lengths to all processors */
2981   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2982   if (!rank) {
2983     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2984     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2985     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2986     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2987     for (j=0; j<m; j++) {
2988       procsnz[0] += ourlens[j];
2989     }
2990     for (i=1; i<size; i++) {
2991       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2992       /* calculate the number of nonzeros on each processor */
2993       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2994         procsnz[i] += rowlengths[j];
2995       }
2996       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2997     }
2998     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2999   } else {
3000     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3001   }
3002 
3003   if (!rank) {
3004     /* determine max buffer needed and allocate it */
3005     maxnz = 0;
3006     for (i=0; i<size; i++) {
3007       maxnz = PetscMax(maxnz,procsnz[i]);
3008     }
3009     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3010 
3011     /* read in my part of the matrix column indices  */
3012     nz   = procsnz[0];
3013     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3014     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3015 
3016     /* read in every one elses and ship off */
3017     for (i=1; i<size; i++) {
3018       nz     = procsnz[i];
3019       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3020       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3021     }
3022     ierr = PetscFree(cols);CHKERRQ(ierr);
3023   } else {
3024     /* determine buffer space needed for message */
3025     nz = 0;
3026     for (i=0; i<m; i++) {
3027       nz += ourlens[i];
3028     }
3029     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3030 
3031     /* receive message of column indices*/
3032     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3033   }
3034 
3035   /* determine column ownership if matrix is not square */
3036   if (N != M) {
3037     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3038     else n = newMat->cmap->n;
3039     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3040     cstart = cend - n;
3041   } else {
3042     cstart = rstart;
3043     cend   = rend;
3044     n      = cend - cstart;
3045   }
3046 
3047   /* loop over local rows, determining number of off diagonal entries */
3048   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3049   jj = 0;
3050   for (i=0; i<m; i++) {
3051     for (j=0; j<ourlens[i]; j++) {
3052       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3053       jj++;
3054     }
3055   }
3056 
3057   for (i=0; i<m; i++) {
3058     ourlens[i] -= offlens[i];
3059   }
3060   if (!sizesset) {
3061     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3062   }
3063   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3064 
3065   for (i=0; i<m; i++) {
3066     ourlens[i] += offlens[i];
3067   }
3068 
3069   if (!rank) {
3070     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3071 
3072     /* read in my part of the matrix numerical values  */
3073     nz   = procsnz[0];
3074     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3075 
3076     /* insert into matrix */
3077     jj      = rstart;
3078     smycols = mycols;
3079     svals   = vals;
3080     for (i=0; i<m; i++) {
3081       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3082       smycols += ourlens[i];
3083       svals   += ourlens[i];
3084       jj++;
3085     }
3086 
3087     /* read in other processors and ship out */
3088     for (i=1; i<size; i++) {
3089       nz     = procsnz[i];
3090       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3091       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3092     }
3093     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3094   } else {
3095     /* receive numeric values */
3096     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3097 
3098     /* receive message of values*/
3099     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3100 
3101     /* insert into matrix */
3102     jj      = rstart;
3103     smycols = mycols;
3104     svals   = vals;
3105     for (i=0; i<m; i++) {
3106       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3107       smycols += ourlens[i];
3108       svals   += ourlens[i];
3109       jj++;
3110     }
3111   }
3112   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3113   ierr = PetscFree(vals);CHKERRQ(ierr);
3114   ierr = PetscFree(mycols);CHKERRQ(ierr);
3115   ierr = PetscFree(rowners);CHKERRQ(ierr);
3116 
3117   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3118   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3119   PetscFunctionReturn(0);
3120 }
3121 
3122 #undef __FUNCT__
3123 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3124 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3125 {
3126   PetscErrorCode ierr;
3127   IS             iscol_local;
3128   PetscInt       csize;
3129 
3130   PetscFunctionBegin;
3131   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3132   if (call == MAT_REUSE_MATRIX) {
3133     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3134     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3135   } else {
3136     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3137   }
3138   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3139   if (call == MAT_INITIAL_MATRIX) {
3140     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3141     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3142   }
3143   PetscFunctionReturn(0);
3144 }
3145 
3146 #undef __FUNCT__
3147 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3148 /*
3149     Not great since it makes two copies of the submatrix, first an SeqAIJ
3150   in local and then by concatenating the local matrices the end result.
3151   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3152 
3153   Note: This requires a sequential iscol with all indices.
3154 */
3155 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3156 {
3157   PetscErrorCode ierr;
3158   PetscMPIInt    rank,size;
3159   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3160   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3161   Mat            *local,M,Mreuse;
3162   MatScalar      *vwork,*aa;
3163   MPI_Comm       comm = ((PetscObject)mat)->comm;
3164   Mat_SeqAIJ     *aij;
3165 
3166 
3167   PetscFunctionBegin;
3168   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3169   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3170 
3171   if (call ==  MAT_REUSE_MATRIX) {
3172     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3173     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3174     local = &Mreuse;
3175     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3176   } else {
3177     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3178     Mreuse = *local;
3179     ierr   = PetscFree(local);CHKERRQ(ierr);
3180   }
3181 
3182   /*
3183       m - number of local rows
3184       n - number of columns (same on all processors)
3185       rstart - first row in new global matrix generated
3186   */
3187   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3188   if (call == MAT_INITIAL_MATRIX) {
3189     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3190     ii  = aij->i;
3191     jj  = aij->j;
3192 
3193     /*
3194         Determine the number of non-zeros in the diagonal and off-diagonal
3195         portions of the matrix in order to do correct preallocation
3196     */
3197 
3198     /* first get start and end of "diagonal" columns */
3199     if (csize == PETSC_DECIDE) {
3200       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3201       if (mglobal == n) { /* square matrix */
3202 	nlocal = m;
3203       } else {
3204         nlocal = n/size + ((n % size) > rank);
3205       }
3206     } else {
3207       nlocal = csize;
3208     }
3209     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3210     rstart = rend - nlocal;
3211     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3212 
3213     /* next, compute all the lengths */
3214     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3215     olens = dlens + m;
3216     for (i=0; i<m; i++) {
3217       jend = ii[i+1] - ii[i];
3218       olen = 0;
3219       dlen = 0;
3220       for (j=0; j<jend; j++) {
3221         if (*jj < rstart || *jj >= rend) olen++;
3222         else dlen++;
3223         jj++;
3224       }
3225       olens[i] = olen;
3226       dlens[i] = dlen;
3227     }
3228     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3229     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3230     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3231     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3232     ierr = PetscFree(dlens);CHKERRQ(ierr);
3233   } else {
3234     PetscInt ml,nl;
3235 
3236     M = *newmat;
3237     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3238     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3239     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3240     /*
3241          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3242        rather than the slower MatSetValues().
3243     */
3244     M->was_assembled = PETSC_TRUE;
3245     M->assembled     = PETSC_FALSE;
3246   }
3247   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3248   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3249   ii  = aij->i;
3250   jj  = aij->j;
3251   aa  = aij->a;
3252   for (i=0; i<m; i++) {
3253     row   = rstart + i;
3254     nz    = ii[i+1] - ii[i];
3255     cwork = jj;     jj += nz;
3256     vwork = aa;     aa += nz;
3257     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3258   }
3259 
3260   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3261   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3262   *newmat = M;
3263 
3264   /* save submatrix used in processor for next request */
3265   if (call ==  MAT_INITIAL_MATRIX) {
3266     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3267     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3268   }
3269 
3270   PetscFunctionReturn(0);
3271 }
3272 
3273 EXTERN_C_BEGIN
3274 #undef __FUNCT__
3275 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3276 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3277 {
3278   PetscInt       m,cstart, cend,j,nnz,i,d;
3279   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3280   const PetscInt *JJ;
3281   PetscScalar    *values;
3282   PetscErrorCode ierr;
3283 
3284   PetscFunctionBegin;
3285   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3286 
3287   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3288   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3289   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3290   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3291   m      = B->rmap->n;
3292   cstart = B->cmap->rstart;
3293   cend   = B->cmap->rend;
3294   rstart = B->rmap->rstart;
3295 
3296   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3297 
3298 #if defined(PETSC_USE_DEBUGGING)
3299   for (i=0; i<m; i++) {
3300     nnz     = Ii[i+1]- Ii[i];
3301     JJ      = J + Ii[i];
3302     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3303     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3304     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3305   }
3306 #endif
3307 
3308   for (i=0; i<m; i++) {
3309     nnz     = Ii[i+1]- Ii[i];
3310     JJ      = J + Ii[i];
3311     nnz_max = PetscMax(nnz_max,nnz);
3312     d       = 0;
3313     for (j=0; j<nnz; j++) {
3314       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3315     }
3316     d_nnz[i] = d;
3317     o_nnz[i] = nnz - d;
3318   }
3319   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3320   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3321 
3322   if (v) values = (PetscScalar*)v;
3323   else {
3324     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3325     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3326   }
3327 
3328   for (i=0; i<m; i++) {
3329     ii   = i + rstart;
3330     nnz  = Ii[i+1]- Ii[i];
3331     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3332   }
3333   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3334   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3335 
3336   if (!v) {
3337     ierr = PetscFree(values);CHKERRQ(ierr);
3338   }
3339   PetscFunctionReturn(0);
3340 }
3341 EXTERN_C_END
3342 
3343 #undef __FUNCT__
3344 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3345 /*@
3346    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3347    (the default parallel PETSc format).
3348 
3349    Collective on MPI_Comm
3350 
3351    Input Parameters:
3352 +  B - the matrix
3353 .  i - the indices into j for the start of each local row (starts with zero)
3354 .  j - the column indices for each local row (starts with zero)
3355 -  v - optional values in the matrix
3356 
3357    Level: developer
3358 
3359    Notes:
3360        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3361      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3362      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3363 
3364        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3365 
3366        The format which is used for the sparse matrix input, is equivalent to a
3367     row-major ordering.. i.e for the following matrix, the input data expected is
3368     as shown:
3369 
3370         1 0 0
3371         2 0 3     P0
3372        -------
3373         4 5 6     P1
3374 
3375      Process0 [P0]: rows_owned=[0,1]
3376         i =  {0,1,3}  [size = nrow+1  = 2+1]
3377         j =  {0,0,2}  [size = nz = 6]
3378         v =  {1,2,3}  [size = nz = 6]
3379 
3380      Process1 [P1]: rows_owned=[2]
3381         i =  {0,3}    [size = nrow+1  = 1+1]
3382         j =  {0,1,2}  [size = nz = 6]
3383         v =  {4,5,6}  [size = nz = 6]
3384 
3385 .keywords: matrix, aij, compressed row, sparse, parallel
3386 
3387 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3388           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3389 @*/
3390 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3391 {
3392   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3393 
3394   PetscFunctionBegin;
3395   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3396   if (f) {
3397     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3398   }
3399   PetscFunctionReturn(0);
3400 }
3401 
3402 #undef __FUNCT__
3403 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3404 /*@C
3405    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3406    (the default parallel PETSc format).  For good matrix assembly performance
3407    the user should preallocate the matrix storage by setting the parameters
3408    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3409    performance can be increased by more than a factor of 50.
3410 
3411    Collective on MPI_Comm
3412 
3413    Input Parameters:
3414 +  A - the matrix
3415 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3416            (same value is used for all local rows)
3417 .  d_nnz - array containing the number of nonzeros in the various rows of the
3418            DIAGONAL portion of the local submatrix (possibly different for each row)
3419            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3420            The size of this array is equal to the number of local rows, i.e 'm'.
3421            You must leave room for the diagonal entry even if it is zero.
3422 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3423            submatrix (same value is used for all local rows).
3424 -  o_nnz - array containing the number of nonzeros in the various rows of the
3425            OFF-DIAGONAL portion of the local submatrix (possibly different for
3426            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3427            structure. The size of this array is equal to the number
3428            of local rows, i.e 'm'.
3429 
3430    If the *_nnz parameter is given then the *_nz parameter is ignored
3431 
3432    The AIJ format (also called the Yale sparse matrix format or
3433    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3434    storage.  The stored row and column indices begin with zero.
3435    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3436 
3437    The parallel matrix is partitioned such that the first m0 rows belong to
3438    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3439    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3440 
3441    The DIAGONAL portion of the local submatrix of a processor can be defined
3442    as the submatrix which is obtained by extraction the part corresponding to
3443    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3444    first row that belongs to the processor, r2 is the last row belonging to
3445    the this processor, and c1-c2 is range of indices of the local part of a
3446    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3447    common case of a square matrix, the row and column ranges are the same and
3448    the DIAGONAL part is also square. The remaining portion of the local
3449    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3450 
3451    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3452 
3453    You can call MatGetInfo() to get information on how effective the preallocation was;
3454    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3455    You can also run with the option -info and look for messages with the string
3456    malloc in them to see if additional memory allocation was needed.
3457 
3458    Example usage:
3459 
3460    Consider the following 8x8 matrix with 34 non-zero values, that is
3461    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3462    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3463    as follows:
3464 
3465 .vb
3466             1  2  0  |  0  3  0  |  0  4
3467     Proc0   0  5  6  |  7  0  0  |  8  0
3468             9  0 10  | 11  0  0  | 12  0
3469     -------------------------------------
3470            13  0 14  | 15 16 17  |  0  0
3471     Proc1   0 18  0  | 19 20 21  |  0  0
3472             0  0  0  | 22 23  0  | 24  0
3473     -------------------------------------
3474     Proc2  25 26 27  |  0  0 28  | 29  0
3475            30  0  0  | 31 32 33  |  0 34
3476 .ve
3477 
3478    This can be represented as a collection of submatrices as:
3479 
3480 .vb
3481       A B C
3482       D E F
3483       G H I
3484 .ve
3485 
3486    Where the submatrices A,B,C are owned by proc0, D,E,F are
3487    owned by proc1, G,H,I are owned by proc2.
3488 
3489    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3490    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3491    The 'M','N' parameters are 8,8, and have the same values on all procs.
3492 
3493    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3494    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3495    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3496    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3497    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3498    matrix, ans [DF] as another SeqAIJ matrix.
3499 
3500    When d_nz, o_nz parameters are specified, d_nz storage elements are
3501    allocated for every row of the local diagonal submatrix, and o_nz
3502    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3503    One way to choose d_nz and o_nz is to use the max nonzerors per local
3504    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3505    In this case, the values of d_nz,o_nz are:
3506 .vb
3507      proc0 : dnz = 2, o_nz = 2
3508      proc1 : dnz = 3, o_nz = 2
3509      proc2 : dnz = 1, o_nz = 4
3510 .ve
3511    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3512    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3513    for proc3. i.e we are using 12+15+10=37 storage locations to store
3514    34 values.
3515 
3516    When d_nnz, o_nnz parameters are specified, the storage is specified
3517    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3518    In the above case the values for d_nnz,o_nnz are:
3519 .vb
3520      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3521      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3522      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3523 .ve
3524    Here the space allocated is sum of all the above values i.e 34, and
3525    hence pre-allocation is perfect.
3526 
3527    Level: intermediate
3528 
3529 .keywords: matrix, aij, compressed row, sparse, parallel
3530 
3531 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3532           MPIAIJ, MatGetInfo()
3533 @*/
3534 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3535 {
3536   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3537 
3538   PetscFunctionBegin;
3539   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3540   if (f) {
3541     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3542   }
3543   PetscFunctionReturn(0);
3544 }
3545 
3546 #undef __FUNCT__
3547 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3548 /*@
3549      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3550          CSR format the local rows.
3551 
3552    Collective on MPI_Comm
3553 
3554    Input Parameters:
3555 +  comm - MPI communicator
3556 .  m - number of local rows (Cannot be PETSC_DECIDE)
3557 .  n - This value should be the same as the local size used in creating the
3558        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3559        calculated if N is given) For square matrices n is almost always m.
3560 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3561 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3562 .   i - row indices
3563 .   j - column indices
3564 -   a - matrix values
3565 
3566    Output Parameter:
3567 .   mat - the matrix
3568 
3569    Level: intermediate
3570 
3571    Notes:
3572        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3573      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3574      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3575 
3576        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3577 
3578        The format which is used for the sparse matrix input, is equivalent to a
3579     row-major ordering.. i.e for the following matrix, the input data expected is
3580     as shown:
3581 
3582         1 0 0
3583         2 0 3     P0
3584        -------
3585         4 5 6     P1
3586 
3587      Process0 [P0]: rows_owned=[0,1]
3588         i =  {0,1,3}  [size = nrow+1  = 2+1]
3589         j =  {0,0,2}  [size = nz = 6]
3590         v =  {1,2,3}  [size = nz = 6]
3591 
3592      Process1 [P1]: rows_owned=[2]
3593         i =  {0,3}    [size = nrow+1  = 1+1]
3594         j =  {0,1,2}  [size = nz = 6]
3595         v =  {4,5,6}  [size = nz = 6]
3596 
3597 .keywords: matrix, aij, compressed row, sparse, parallel
3598 
3599 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3600           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3601 @*/
3602 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3603 {
3604   PetscErrorCode ierr;
3605 
3606  PetscFunctionBegin;
3607   if (i[0]) {
3608     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3609   }
3610   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3611   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3612   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3613   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3614   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3615   PetscFunctionReturn(0);
3616 }
3617 
3618 #undef __FUNCT__
3619 #define __FUNCT__ "MatCreateMPIAIJ"
3620 /*@C
3621    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3622    (the default parallel PETSc format).  For good matrix assembly performance
3623    the user should preallocate the matrix storage by setting the parameters
3624    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3625    performance can be increased by more than a factor of 50.
3626 
3627    Collective on MPI_Comm
3628 
3629    Input Parameters:
3630 +  comm - MPI communicator
3631 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3632            This value should be the same as the local size used in creating the
3633            y vector for the matrix-vector product y = Ax.
3634 .  n - This value should be the same as the local size used in creating the
3635        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3636        calculated if N is given) For square matrices n is almost always m.
3637 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3638 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3639 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3640            (same value is used for all local rows)
3641 .  d_nnz - array containing the number of nonzeros in the various rows of the
3642            DIAGONAL portion of the local submatrix (possibly different for each row)
3643            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3644            The size of this array is equal to the number of local rows, i.e 'm'.
3645            You must leave room for the diagonal entry even if it is zero.
3646 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3647            submatrix (same value is used for all local rows).
3648 -  o_nnz - array containing the number of nonzeros in the various rows of the
3649            OFF-DIAGONAL portion of the local submatrix (possibly different for
3650            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3651            structure. The size of this array is equal to the number
3652            of local rows, i.e 'm'.
3653 
3654    Output Parameter:
3655 .  A - the matrix
3656 
3657    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3658    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3659    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3660 
3661    Notes:
3662    If the *_nnz parameter is given then the *_nz parameter is ignored
3663 
3664    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3665    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3666    storage requirements for this matrix.
3667 
3668    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3669    processor than it must be used on all processors that share the object for
3670    that argument.
3671 
3672    The user MUST specify either the local or global matrix dimensions
3673    (possibly both).
3674 
3675    The parallel matrix is partitioned across processors such that the
3676    first m0 rows belong to process 0, the next m1 rows belong to
3677    process 1, the next m2 rows belong to process 2 etc.. where
3678    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3679    values corresponding to [m x N] submatrix.
3680 
3681    The columns are logically partitioned with the n0 columns belonging
3682    to 0th partition, the next n1 columns belonging to the next
3683    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3684 
3685    The DIAGONAL portion of the local submatrix on any given processor
3686    is the submatrix corresponding to the rows and columns m,n
3687    corresponding to the given processor. i.e diagonal matrix on
3688    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3689    etc. The remaining portion of the local submatrix [m x (N-n)]
3690    constitute the OFF-DIAGONAL portion. The example below better
3691    illustrates this concept.
3692 
3693    For a square global matrix we define each processor's diagonal portion
3694    to be its local rows and the corresponding columns (a square submatrix);
3695    each processor's off-diagonal portion encompasses the remainder of the
3696    local matrix (a rectangular submatrix).
3697 
3698    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3699 
3700    When calling this routine with a single process communicator, a matrix of
3701    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3702    type of communicator, use the construction mechanism:
3703      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3704 
3705    By default, this format uses inodes (identical nodes) when possible.
3706    We search for consecutive rows with the same nonzero structure, thereby
3707    reusing matrix information to achieve increased efficiency.
3708 
3709    Options Database Keys:
3710 +  -mat_no_inode  - Do not use inodes
3711 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3712 -  -mat_aij_oneindex - Internally use indexing starting at 1
3713         rather than 0.  Note that when calling MatSetValues(),
3714         the user still MUST index entries starting at 0!
3715 
3716 
3717    Example usage:
3718 
3719    Consider the following 8x8 matrix with 34 non-zero values, that is
3720    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3721    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3722    as follows:
3723 
3724 .vb
3725             1  2  0  |  0  3  0  |  0  4
3726     Proc0   0  5  6  |  7  0  0  |  8  0
3727             9  0 10  | 11  0  0  | 12  0
3728     -------------------------------------
3729            13  0 14  | 15 16 17  |  0  0
3730     Proc1   0 18  0  | 19 20 21  |  0  0
3731             0  0  0  | 22 23  0  | 24  0
3732     -------------------------------------
3733     Proc2  25 26 27  |  0  0 28  | 29  0
3734            30  0  0  | 31 32 33  |  0 34
3735 .ve
3736 
3737    This can be represented as a collection of submatrices as:
3738 
3739 .vb
3740       A B C
3741       D E F
3742       G H I
3743 .ve
3744 
3745    Where the submatrices A,B,C are owned by proc0, D,E,F are
3746    owned by proc1, G,H,I are owned by proc2.
3747 
3748    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3749    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3750    The 'M','N' parameters are 8,8, and have the same values on all procs.
3751 
3752    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3753    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3754    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3755    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3756    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3757    matrix, ans [DF] as another SeqAIJ matrix.
3758 
3759    When d_nz, o_nz parameters are specified, d_nz storage elements are
3760    allocated for every row of the local diagonal submatrix, and o_nz
3761    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3762    One way to choose d_nz and o_nz is to use the max nonzerors per local
3763    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3764    In this case, the values of d_nz,o_nz are:
3765 .vb
3766      proc0 : dnz = 2, o_nz = 2
3767      proc1 : dnz = 3, o_nz = 2
3768      proc2 : dnz = 1, o_nz = 4
3769 .ve
3770    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3771    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3772    for proc3. i.e we are using 12+15+10=37 storage locations to store
3773    34 values.
3774 
3775    When d_nnz, o_nnz parameters are specified, the storage is specified
3776    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3777    In the above case the values for d_nnz,o_nnz are:
3778 .vb
3779      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3780      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3781      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3782 .ve
3783    Here the space allocated is sum of all the above values i.e 34, and
3784    hence pre-allocation is perfect.
3785 
3786    Level: intermediate
3787 
3788 .keywords: matrix, aij, compressed row, sparse, parallel
3789 
3790 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3791           MPIAIJ, MatCreateMPIAIJWithArrays()
3792 @*/
3793 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3794 {
3795   PetscErrorCode ierr;
3796   PetscMPIInt    size;
3797 
3798   PetscFunctionBegin;
3799   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3800   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3801   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3802   if (size > 1) {
3803     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3804     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3805   } else {
3806     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3807     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3808   }
3809   PetscFunctionReturn(0);
3810 }
3811 
3812 #undef __FUNCT__
3813 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3814 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3815 {
3816   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3817 
3818   PetscFunctionBegin;
3819   *Ad     = a->A;
3820   *Ao     = a->B;
3821   *colmap = a->garray;
3822   PetscFunctionReturn(0);
3823 }
3824 
3825 #undef __FUNCT__
3826 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3827 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3828 {
3829   PetscErrorCode ierr;
3830   PetscInt       i;
3831   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3832 
3833   PetscFunctionBegin;
3834   if (coloring->ctype == IS_COLORING_GLOBAL) {
3835     ISColoringValue *allcolors,*colors;
3836     ISColoring      ocoloring;
3837 
3838     /* set coloring for diagonal portion */
3839     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3840 
3841     /* set coloring for off-diagonal portion */
3842     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3843     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3844     for (i=0; i<a->B->cmap->n; i++) {
3845       colors[i] = allcolors[a->garray[i]];
3846     }
3847     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3848     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3849     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3850     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3851   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3852     ISColoringValue *colors;
3853     PetscInt        *larray;
3854     ISColoring      ocoloring;
3855 
3856     /* set coloring for diagonal portion */
3857     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3858     for (i=0; i<a->A->cmap->n; i++) {
3859       larray[i] = i + A->cmap->rstart;
3860     }
3861     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3862     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3863     for (i=0; i<a->A->cmap->n; i++) {
3864       colors[i] = coloring->colors[larray[i]];
3865     }
3866     ierr = PetscFree(larray);CHKERRQ(ierr);
3867     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3868     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3869     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3870 
3871     /* set coloring for off-diagonal portion */
3872     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3873     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3874     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3875     for (i=0; i<a->B->cmap->n; i++) {
3876       colors[i] = coloring->colors[larray[i]];
3877     }
3878     ierr = PetscFree(larray);CHKERRQ(ierr);
3879     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3880     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3881     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3882   } else {
3883     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3884   }
3885 
3886   PetscFunctionReturn(0);
3887 }
3888 
3889 #if defined(PETSC_HAVE_ADIC)
3890 #undef __FUNCT__
3891 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3892 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3893 {
3894   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3895   PetscErrorCode ierr;
3896 
3897   PetscFunctionBegin;
3898   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3899   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3900   PetscFunctionReturn(0);
3901 }
3902 #endif
3903 
3904 #undef __FUNCT__
3905 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3906 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3907 {
3908   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3909   PetscErrorCode ierr;
3910 
3911   PetscFunctionBegin;
3912   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3913   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3914   PetscFunctionReturn(0);
3915 }
3916 
3917 #undef __FUNCT__
3918 #define __FUNCT__ "MatMerge"
3919 /*@
3920       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3921                  matrices from each processor
3922 
3923     Collective on MPI_Comm
3924 
3925    Input Parameters:
3926 +    comm - the communicators the parallel matrix will live on
3927 .    inmat - the input sequential matrices
3928 .    n - number of local columns (or PETSC_DECIDE)
3929 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3930 
3931    Output Parameter:
3932 .    outmat - the parallel matrix generated
3933 
3934     Level: advanced
3935 
3936    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3937 
3938 @*/
3939 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3940 {
3941   PetscErrorCode ierr;
3942   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3943   PetscInt       *indx;
3944   PetscScalar    *values;
3945 
3946   PetscFunctionBegin;
3947   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3948   if (scall == MAT_INITIAL_MATRIX){
3949     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3950     if (n == PETSC_DECIDE){
3951       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3952     }
3953     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3954     rstart -= m;
3955 
3956     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3957     for (i=0;i<m;i++) {
3958       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3959       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3960       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3961     }
3962     /* This routine will ONLY return MPIAIJ type matrix */
3963     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3964     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3965     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3966     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3967     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3968 
3969   } else if (scall == MAT_REUSE_MATRIX){
3970     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3971   } else {
3972     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3973   }
3974 
3975   for (i=0;i<m;i++) {
3976     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3977     Ii    = i + rstart;
3978     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3979     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3980   }
3981   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3982   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3983   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3984 
3985   PetscFunctionReturn(0);
3986 }
3987 
3988 #undef __FUNCT__
3989 #define __FUNCT__ "MatFileSplit"
3990 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3991 {
3992   PetscErrorCode    ierr;
3993   PetscMPIInt       rank;
3994   PetscInt          m,N,i,rstart,nnz;
3995   size_t            len;
3996   const PetscInt    *indx;
3997   PetscViewer       out;
3998   char              *name;
3999   Mat               B;
4000   const PetscScalar *values;
4001 
4002   PetscFunctionBegin;
4003   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4004   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4005   /* Should this be the type of the diagonal block of A? */
4006   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4007   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4008   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4009   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4010   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4011   for (i=0;i<m;i++) {
4012     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4013     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4014     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4015   }
4016   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4017   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4018 
4019   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4020   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4021   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4022   sprintf(name,"%s.%d",outfile,rank);
4023   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4024   ierr = PetscFree(name);
4025   ierr = MatView(B,out);CHKERRQ(ierr);
4026   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4027   ierr = MatDestroy(B);CHKERRQ(ierr);
4028   PetscFunctionReturn(0);
4029 }
4030 
4031 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
4032 #undef __FUNCT__
4033 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4034 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4035 {
4036   PetscErrorCode       ierr;
4037   Mat_Merge_SeqsToMPI  *merge;
4038   PetscContainer       container;
4039 
4040   PetscFunctionBegin;
4041   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4042   if (container) {
4043     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4044     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4045     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4046     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4047     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4048     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4049     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4050     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4051     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4052     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4053     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4054     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4055     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4056     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4057 
4058     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4059     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4060   }
4061   ierr = PetscFree(merge);CHKERRQ(ierr);
4062 
4063   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4064   PetscFunctionReturn(0);
4065 }
4066 
4067 #include "../src/mat/utils/freespace.h"
4068 #include "petscbt.h"
4069 
4070 #undef __FUNCT__
4071 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4072 /*@C
4073       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4074                  matrices from each processor
4075 
4076     Collective on MPI_Comm
4077 
4078    Input Parameters:
4079 +    comm - the communicators the parallel matrix will live on
4080 .    seqmat - the input sequential matrices
4081 .    m - number of local rows (or PETSC_DECIDE)
4082 .    n - number of local columns (or PETSC_DECIDE)
4083 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4084 
4085    Output Parameter:
4086 .    mpimat - the parallel matrix generated
4087 
4088     Level: advanced
4089 
4090    Notes:
4091      The dimensions of the sequential matrix in each processor MUST be the same.
4092      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4093      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4094 @*/
4095 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4096 {
4097   PetscErrorCode       ierr;
4098   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4099   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4100   PetscMPIInt          size,rank,taga,*len_s;
4101   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4102   PetscInt             proc,m;
4103   PetscInt             **buf_ri,**buf_rj;
4104   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4105   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4106   MPI_Request          *s_waits,*r_waits;
4107   MPI_Status           *status;
4108   MatScalar            *aa=a->a;
4109   MatScalar            **abuf_r,*ba_i;
4110   Mat_Merge_SeqsToMPI  *merge;
4111   PetscContainer       container;
4112 
4113   PetscFunctionBegin;
4114   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4115 
4116   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4117   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4118 
4119   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4120   if (container) {
4121     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4122   }
4123   bi     = merge->bi;
4124   bj     = merge->bj;
4125   buf_ri = merge->buf_ri;
4126   buf_rj = merge->buf_rj;
4127 
4128   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4129   owners = merge->rowmap->range;
4130   len_s  = merge->len_s;
4131 
4132   /* send and recv matrix values */
4133   /*-----------------------------*/
4134   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4135   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4136 
4137   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4138   for (proc=0,k=0; proc<size; proc++){
4139     if (!len_s[proc]) continue;
4140     i = owners[proc];
4141     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4142     k++;
4143   }
4144 
4145   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4146   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4147   ierr = PetscFree(status);CHKERRQ(ierr);
4148 
4149   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4150   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4151 
4152   /* insert mat values of mpimat */
4153   /*----------------------------*/
4154   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4155   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4156 
4157   for (k=0; k<merge->nrecv; k++){
4158     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4159     nrows = *(buf_ri_k[k]);
4160     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4161     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4162   }
4163 
4164   /* set values of ba */
4165   m = merge->rowmap->n;
4166   for (i=0; i<m; i++) {
4167     arow = owners[rank] + i;
4168     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4169     bnzi = bi[i+1] - bi[i];
4170     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4171 
4172     /* add local non-zero vals of this proc's seqmat into ba */
4173     anzi = ai[arow+1] - ai[arow];
4174     aj   = a->j + ai[arow];
4175     aa   = a->a + ai[arow];
4176     nextaj = 0;
4177     for (j=0; nextaj<anzi; j++){
4178       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4179         ba_i[j] += aa[nextaj++];
4180       }
4181     }
4182 
4183     /* add received vals into ba */
4184     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4185       /* i-th row */
4186       if (i == *nextrow[k]) {
4187         anzi = *(nextai[k]+1) - *nextai[k];
4188         aj   = buf_rj[k] + *(nextai[k]);
4189         aa   = abuf_r[k] + *(nextai[k]);
4190         nextaj = 0;
4191         for (j=0; nextaj<anzi; j++){
4192           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4193             ba_i[j] += aa[nextaj++];
4194           }
4195         }
4196         nextrow[k]++; nextai[k]++;
4197       }
4198     }
4199     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4200   }
4201   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4202   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4203 
4204   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4205   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4206   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4207   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4208   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4209   PetscFunctionReturn(0);
4210 }
4211 
4212 #undef __FUNCT__
4213 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4214 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4215 {
4216   PetscErrorCode       ierr;
4217   Mat                  B_mpi;
4218   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4219   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4220   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4221   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4222   PetscInt             len,proc,*dnz,*onz;
4223   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4224   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4225   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4226   MPI_Status           *status;
4227   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4228   PetscBT              lnkbt;
4229   Mat_Merge_SeqsToMPI  *merge;
4230   PetscContainer       container;
4231 
4232   PetscFunctionBegin;
4233   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4234 
4235   /* make sure it is a PETSc comm */
4236   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4237   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4238   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4239 
4240   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4241   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4242 
4243   /* determine row ownership */
4244   /*---------------------------------------------------------*/
4245   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4246   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4247   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4248   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4249   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4250   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4251   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4252 
4253   m      = merge->rowmap->n;
4254   M      = merge->rowmap->N;
4255   owners = merge->rowmap->range;
4256 
4257   /* determine the number of messages to send, their lengths */
4258   /*---------------------------------------------------------*/
4259   len_s  = merge->len_s;
4260 
4261   len = 0;  /* length of buf_si[] */
4262   merge->nsend = 0;
4263   for (proc=0; proc<size; proc++){
4264     len_si[proc] = 0;
4265     if (proc == rank){
4266       len_s[proc] = 0;
4267     } else {
4268       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4269       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4270     }
4271     if (len_s[proc]) {
4272       merge->nsend++;
4273       nrows = 0;
4274       for (i=owners[proc]; i<owners[proc+1]; i++){
4275         if (ai[i+1] > ai[i]) nrows++;
4276       }
4277       len_si[proc] = 2*(nrows+1);
4278       len += len_si[proc];
4279     }
4280   }
4281 
4282   /* determine the number and length of messages to receive for ij-structure */
4283   /*-------------------------------------------------------------------------*/
4284   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4285   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4286 
4287   /* post the Irecv of j-structure */
4288   /*-------------------------------*/
4289   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4290   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4291 
4292   /* post the Isend of j-structure */
4293   /*--------------------------------*/
4294   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4295 
4296   for (proc=0, k=0; proc<size; proc++){
4297     if (!len_s[proc]) continue;
4298     i = owners[proc];
4299     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4300     k++;
4301   }
4302 
4303   /* receives and sends of j-structure are complete */
4304   /*------------------------------------------------*/
4305   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4306   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4307 
4308   /* send and recv i-structure */
4309   /*---------------------------*/
4310   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4311   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4312 
4313   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4314   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4315   for (proc=0,k=0; proc<size; proc++){
4316     if (!len_s[proc]) continue;
4317     /* form outgoing message for i-structure:
4318          buf_si[0]:                 nrows to be sent
4319                [1:nrows]:           row index (global)
4320                [nrows+1:2*nrows+1]: i-structure index
4321     */
4322     /*-------------------------------------------*/
4323     nrows = len_si[proc]/2 - 1;
4324     buf_si_i    = buf_si + nrows+1;
4325     buf_si[0]   = nrows;
4326     buf_si_i[0] = 0;
4327     nrows = 0;
4328     for (i=owners[proc]; i<owners[proc+1]; i++){
4329       anzi = ai[i+1] - ai[i];
4330       if (anzi) {
4331         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4332         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4333         nrows++;
4334       }
4335     }
4336     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4337     k++;
4338     buf_si += len_si[proc];
4339   }
4340 
4341   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4342   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4343 
4344   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4345   for (i=0; i<merge->nrecv; i++){
4346     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4347   }
4348 
4349   ierr = PetscFree(len_si);CHKERRQ(ierr);
4350   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4351   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4352   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4353   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4354   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4355   ierr = PetscFree(status);CHKERRQ(ierr);
4356 
4357   /* compute a local seq matrix in each processor */
4358   /*----------------------------------------------*/
4359   /* allocate bi array and free space for accumulating nonzero column info */
4360   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4361   bi[0] = 0;
4362 
4363   /* create and initialize a linked list */
4364   nlnk = N+1;
4365   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4366 
4367   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4368   len = 0;
4369   len  = ai[owners[rank+1]] - ai[owners[rank]];
4370   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4371   current_space = free_space;
4372 
4373   /* determine symbolic info for each local row */
4374   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4375 
4376   for (k=0; k<merge->nrecv; k++){
4377     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4378     nrows = *buf_ri_k[k];
4379     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4380     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4381   }
4382 
4383   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4384   len = 0;
4385   for (i=0;i<m;i++) {
4386     bnzi   = 0;
4387     /* add local non-zero cols of this proc's seqmat into lnk */
4388     arow   = owners[rank] + i;
4389     anzi   = ai[arow+1] - ai[arow];
4390     aj     = a->j + ai[arow];
4391     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4392     bnzi += nlnk;
4393     /* add received col data into lnk */
4394     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4395       if (i == *nextrow[k]) { /* i-th row */
4396         anzi = *(nextai[k]+1) - *nextai[k];
4397         aj   = buf_rj[k] + *nextai[k];
4398         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4399         bnzi += nlnk;
4400         nextrow[k]++; nextai[k]++;
4401       }
4402     }
4403     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4404 
4405     /* if free space is not available, make more free space */
4406     if (current_space->local_remaining<bnzi) {
4407       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4408       nspacedouble++;
4409     }
4410     /* copy data into free space, then initialize lnk */
4411     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4412     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4413 
4414     current_space->array           += bnzi;
4415     current_space->local_used      += bnzi;
4416     current_space->local_remaining -= bnzi;
4417 
4418     bi[i+1] = bi[i] + bnzi;
4419   }
4420 
4421   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4422 
4423   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4424   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4425   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4426 
4427   /* create symbolic parallel matrix B_mpi */
4428   /*---------------------------------------*/
4429   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4430   if (n==PETSC_DECIDE) {
4431     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4432   } else {
4433     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4434   }
4435   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4436   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4437   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4438 
4439   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4440   B_mpi->assembled     = PETSC_FALSE;
4441   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4442   merge->bi            = bi;
4443   merge->bj            = bj;
4444   merge->buf_ri        = buf_ri;
4445   merge->buf_rj        = buf_rj;
4446   merge->coi           = PETSC_NULL;
4447   merge->coj           = PETSC_NULL;
4448   merge->owners_co     = PETSC_NULL;
4449 
4450   /* attach the supporting struct to B_mpi for reuse */
4451   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4452   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4453   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4454   *mpimat = B_mpi;
4455 
4456   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4457   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4458   PetscFunctionReturn(0);
4459 }
4460 
4461 #undef __FUNCT__
4462 #define __FUNCT__ "MatMerge_SeqsToMPI"
4463 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4464 {
4465   PetscErrorCode   ierr;
4466 
4467   PetscFunctionBegin;
4468   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4469   if (scall == MAT_INITIAL_MATRIX){
4470     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4471   }
4472   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4473   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4474   PetscFunctionReturn(0);
4475 }
4476 
4477 #undef __FUNCT__
4478 #define __FUNCT__ "MatGetLocalMat"
4479 /*@
4480      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4481 
4482     Not Collective
4483 
4484    Input Parameters:
4485 +    A - the matrix
4486 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4487 
4488    Output Parameter:
4489 .    A_loc - the local sequential matrix generated
4490 
4491     Level: developer
4492 
4493 @*/
4494 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4495 {
4496   PetscErrorCode  ierr;
4497   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4498   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4499   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4500   MatScalar       *aa=a->a,*ba=b->a,*cam;
4501   PetscScalar     *ca;
4502   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4503   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4504 
4505   PetscFunctionBegin;
4506   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4507   if (scall == MAT_INITIAL_MATRIX){
4508     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4509     ci[0] = 0;
4510     for (i=0; i<am; i++){
4511       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4512     }
4513     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4514     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4515     k = 0;
4516     for (i=0; i<am; i++) {
4517       ncols_o = bi[i+1] - bi[i];
4518       ncols_d = ai[i+1] - ai[i];
4519       /* off-diagonal portion of A */
4520       for (jo=0; jo<ncols_o; jo++) {
4521         col = cmap[*bj];
4522         if (col >= cstart) break;
4523         cj[k]   = col; bj++;
4524         ca[k++] = *ba++;
4525       }
4526       /* diagonal portion of A */
4527       for (j=0; j<ncols_d; j++) {
4528         cj[k]   = cstart + *aj++;
4529         ca[k++] = *aa++;
4530       }
4531       /* off-diagonal portion of A */
4532       for (j=jo; j<ncols_o; j++) {
4533         cj[k]   = cmap[*bj++];
4534         ca[k++] = *ba++;
4535       }
4536     }
4537     /* put together the new matrix */
4538     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4539     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4540     /* Since these are PETSc arrays, change flags to free them as necessary. */
4541     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4542     mat->free_a  = PETSC_TRUE;
4543     mat->free_ij = PETSC_TRUE;
4544     mat->nonew   = 0;
4545   } else if (scall == MAT_REUSE_MATRIX){
4546     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4547     ci = mat->i; cj = mat->j; cam = mat->a;
4548     for (i=0; i<am; i++) {
4549       /* off-diagonal portion of A */
4550       ncols_o = bi[i+1] - bi[i];
4551       for (jo=0; jo<ncols_o; jo++) {
4552         col = cmap[*bj];
4553         if (col >= cstart) break;
4554         *cam++ = *ba++; bj++;
4555       }
4556       /* diagonal portion of A */
4557       ncols_d = ai[i+1] - ai[i];
4558       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4559       /* off-diagonal portion of A */
4560       for (j=jo; j<ncols_o; j++) {
4561         *cam++ = *ba++; bj++;
4562       }
4563     }
4564   } else {
4565     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4566   }
4567 
4568   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4569   PetscFunctionReturn(0);
4570 }
4571 
4572 #undef __FUNCT__
4573 #define __FUNCT__ "MatGetLocalMatCondensed"
4574 /*@C
4575      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4576 
4577     Not Collective
4578 
4579    Input Parameters:
4580 +    A - the matrix
4581 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4582 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4583 
4584    Output Parameter:
4585 .    A_loc - the local sequential matrix generated
4586 
4587     Level: developer
4588 
4589 @*/
4590 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4591 {
4592   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4593   PetscErrorCode    ierr;
4594   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4595   IS                isrowa,iscola;
4596   Mat               *aloc;
4597 
4598   PetscFunctionBegin;
4599   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4600   if (!row){
4601     start = A->rmap->rstart; end = A->rmap->rend;
4602     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4603   } else {
4604     isrowa = *row;
4605   }
4606   if (!col){
4607     start = A->cmap->rstart;
4608     cmap  = a->garray;
4609     nzA   = a->A->cmap->n;
4610     nzB   = a->B->cmap->n;
4611     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4612     ncols = 0;
4613     for (i=0; i<nzB; i++) {
4614       if (cmap[i] < start) idx[ncols++] = cmap[i];
4615       else break;
4616     }
4617     imark = i;
4618     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4619     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4620     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4621     ierr = PetscFree(idx);CHKERRQ(ierr);
4622   } else {
4623     iscola = *col;
4624   }
4625   if (scall != MAT_INITIAL_MATRIX){
4626     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4627     aloc[0] = *A_loc;
4628   }
4629   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4630   *A_loc = aloc[0];
4631   ierr = PetscFree(aloc);CHKERRQ(ierr);
4632   if (!row){
4633     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4634   }
4635   if (!col){
4636     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4637   }
4638   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4639   PetscFunctionReturn(0);
4640 }
4641 
4642 #undef __FUNCT__
4643 #define __FUNCT__ "MatGetBrowsOfAcols"
4644 /*@C
4645     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4646 
4647     Collective on Mat
4648 
4649    Input Parameters:
4650 +    A,B - the matrices in mpiaij format
4651 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4652 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4653 
4654    Output Parameter:
4655 +    rowb, colb - index sets of rows and columns of B to extract
4656 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4657 -    B_seq - the sequential matrix generated
4658 
4659     Level: developer
4660 
4661 @*/
4662 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4663 {
4664   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4665   PetscErrorCode    ierr;
4666   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4667   IS                isrowb,iscolb;
4668   Mat               *bseq;
4669 
4670   PetscFunctionBegin;
4671   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4672     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4673   }
4674   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4675 
4676   if (scall == MAT_INITIAL_MATRIX){
4677     start = A->cmap->rstart;
4678     cmap  = a->garray;
4679     nzA   = a->A->cmap->n;
4680     nzB   = a->B->cmap->n;
4681     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4682     ncols = 0;
4683     for (i=0; i<nzB; i++) {  /* row < local row index */
4684       if (cmap[i] < start) idx[ncols++] = cmap[i];
4685       else break;
4686     }
4687     imark = i;
4688     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4689     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4690     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4691     ierr = PetscFree(idx);CHKERRQ(ierr);
4692     *brstart = imark;
4693     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4694   } else {
4695     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4696     isrowb = *rowb; iscolb = *colb;
4697     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4698     bseq[0] = *B_seq;
4699   }
4700   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4701   *B_seq = bseq[0];
4702   ierr = PetscFree(bseq);CHKERRQ(ierr);
4703   if (!rowb){
4704     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4705   } else {
4706     *rowb = isrowb;
4707   }
4708   if (!colb){
4709     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4710   } else {
4711     *colb = iscolb;
4712   }
4713   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4714   PetscFunctionReturn(0);
4715 }
4716 
4717 #undef __FUNCT__
4718 #define __FUNCT__ "MatGetBrowsOfAoCols"
4719 /*@C
4720     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4721     of the OFF-DIAGONAL portion of local A
4722 
4723     Collective on Mat
4724 
4725    Input Parameters:
4726 +    A,B - the matrices in mpiaij format
4727 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4728 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4729 .    startsj_r - similar to startsj for receives
4730 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4731 
4732    Output Parameter:
4733 +    B_oth - the sequential matrix generated
4734 
4735     Level: developer
4736 
4737 @*/
4738 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4739 {
4740   VecScatter_MPI_General *gen_to,*gen_from;
4741   PetscErrorCode         ierr;
4742   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4743   Mat_SeqAIJ             *b_oth;
4744   VecScatter             ctx=a->Mvctx;
4745   MPI_Comm               comm=((PetscObject)ctx)->comm;
4746   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4747   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4748   PetscScalar            *rvalues,*svalues;
4749   MatScalar              *b_otha,*bufa,*bufA;
4750   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4751   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4752   MPI_Status             *sstatus,rstatus;
4753   PetscMPIInt            jj;
4754   PetscInt               *cols,sbs,rbs;
4755   PetscScalar            *vals;
4756 
4757   PetscFunctionBegin;
4758   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4759     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4760   }
4761   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4762   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4763 
4764   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4765   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4766   rvalues  = gen_from->values; /* holds the length of receiving row */
4767   svalues  = gen_to->values;   /* holds the length of sending row */
4768   nrecvs   = gen_from->n;
4769   nsends   = gen_to->n;
4770 
4771   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4772   srow     = gen_to->indices;   /* local row index to be sent */
4773   sstarts  = gen_to->starts;
4774   sprocs   = gen_to->procs;
4775   sstatus  = gen_to->sstatus;
4776   sbs      = gen_to->bs;
4777   rstarts  = gen_from->starts;
4778   rprocs   = gen_from->procs;
4779   rbs      = gen_from->bs;
4780 
4781   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4782   if (scall == MAT_INITIAL_MATRIX){
4783     /* i-array */
4784     /*---------*/
4785     /*  post receives */
4786     for (i=0; i<nrecvs; i++){
4787       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4788       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4789       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4790     }
4791 
4792     /* pack the outgoing message */
4793     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4794     sstartsj[0] = 0;  rstartsj[0] = 0;
4795     len = 0; /* total length of j or a array to be sent */
4796     k = 0;
4797     for (i=0; i<nsends; i++){
4798       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4799       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4800       for (j=0; j<nrows; j++) {
4801         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4802         for (l=0; l<sbs; l++){
4803           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4804           rowlen[j*sbs+l] = ncols;
4805           len += ncols;
4806           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4807         }
4808         k++;
4809       }
4810       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4811       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4812     }
4813     /* recvs and sends of i-array are completed */
4814     i = nrecvs;
4815     while (i--) {
4816       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4817     }
4818     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4819 
4820     /* allocate buffers for sending j and a arrays */
4821     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4822     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4823 
4824     /* create i-array of B_oth */
4825     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4826     b_othi[0] = 0;
4827     len = 0; /* total length of j or a array to be received */
4828     k = 0;
4829     for (i=0; i<nrecvs; i++){
4830       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4831       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4832       for (j=0; j<nrows; j++) {
4833         b_othi[k+1] = b_othi[k] + rowlen[j];
4834         len += rowlen[j]; k++;
4835       }
4836       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4837     }
4838 
4839     /* allocate space for j and a arrrays of B_oth */
4840     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4841     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4842 
4843     /* j-array */
4844     /*---------*/
4845     /*  post receives of j-array */
4846     for (i=0; i<nrecvs; i++){
4847       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4848       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4849     }
4850 
4851     /* pack the outgoing message j-array */
4852     k = 0;
4853     for (i=0; i<nsends; i++){
4854       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4855       bufJ = bufj+sstartsj[i];
4856       for (j=0; j<nrows; j++) {
4857         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4858         for (ll=0; ll<sbs; ll++){
4859           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4860           for (l=0; l<ncols; l++){
4861             *bufJ++ = cols[l];
4862           }
4863           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4864         }
4865       }
4866       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4867     }
4868 
4869     /* recvs and sends of j-array are completed */
4870     i = nrecvs;
4871     while (i--) {
4872       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4873     }
4874     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4875   } else if (scall == MAT_REUSE_MATRIX){
4876     sstartsj = *startsj;
4877     rstartsj = *startsj_r;
4878     bufa     = *bufa_ptr;
4879     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4880     b_otha   = b_oth->a;
4881   } else {
4882     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4883   }
4884 
4885   /* a-array */
4886   /*---------*/
4887   /*  post receives of a-array */
4888   for (i=0; i<nrecvs; i++){
4889     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4890     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4891   }
4892 
4893   /* pack the outgoing message a-array */
4894   k = 0;
4895   for (i=0; i<nsends; i++){
4896     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4897     bufA = bufa+sstartsj[i];
4898     for (j=0; j<nrows; j++) {
4899       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4900       for (ll=0; ll<sbs; ll++){
4901         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4902         for (l=0; l<ncols; l++){
4903           *bufA++ = vals[l];
4904         }
4905         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4906       }
4907     }
4908     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4909   }
4910   /* recvs and sends of a-array are completed */
4911   i = nrecvs;
4912   while (i--) {
4913     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4914   }
4915   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4916   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4917 
4918   if (scall == MAT_INITIAL_MATRIX){
4919     /* put together the new matrix */
4920     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4921 
4922     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4923     /* Since these are PETSc arrays, change flags to free them as necessary. */
4924     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4925     b_oth->free_a  = PETSC_TRUE;
4926     b_oth->free_ij = PETSC_TRUE;
4927     b_oth->nonew   = 0;
4928 
4929     ierr = PetscFree(bufj);CHKERRQ(ierr);
4930     if (!startsj || !bufa_ptr){
4931       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4932       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4933     } else {
4934       *startsj   = sstartsj;
4935       *startsj_r = rstartsj;
4936       *bufa_ptr  = bufa;
4937     }
4938   }
4939   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4940   PetscFunctionReturn(0);
4941 }
4942 
4943 #undef __FUNCT__
4944 #define __FUNCT__ "MatGetCommunicationStructs"
4945 /*@C
4946   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4947 
4948   Not Collective
4949 
4950   Input Parameters:
4951 . A - The matrix in mpiaij format
4952 
4953   Output Parameter:
4954 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4955 . colmap - A map from global column index to local index into lvec
4956 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4957 
4958   Level: developer
4959 
4960 @*/
4961 #if defined (PETSC_USE_CTABLE)
4962 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4963 #else
4964 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4965 #endif
4966 {
4967   Mat_MPIAIJ *a;
4968 
4969   PetscFunctionBegin;
4970   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4971   PetscValidPointer(lvec, 2);
4972   PetscValidPointer(colmap, 3);
4973   PetscValidPointer(multScatter, 4);
4974   a = (Mat_MPIAIJ *) A->data;
4975   if (lvec) *lvec = a->lvec;
4976   if (colmap) *colmap = a->colmap;
4977   if (multScatter) *multScatter = a->Mvctx;
4978   PetscFunctionReturn(0);
4979 }
4980 
4981 EXTERN_C_BEGIN
4982 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
4983 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
4984 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4985 EXTERN_C_END
4986 
4987 #undef __FUNCT__
4988 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4989 /*
4990     Computes (B'*A')' since computing B*A directly is untenable
4991 
4992                n                       p                          p
4993         (              )       (              )         (                  )
4994       m (      A       )  *  n (       B      )   =   m (         C        )
4995         (              )       (              )         (                  )
4996 
4997 */
4998 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4999 {
5000   PetscErrorCode     ierr;
5001   Mat                At,Bt,Ct;
5002 
5003   PetscFunctionBegin;
5004   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5005   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5006   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5007   ierr = MatDestroy(At);CHKERRQ(ierr);
5008   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5009   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5010   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5011   PetscFunctionReturn(0);
5012 }
5013 
5014 #undef __FUNCT__
5015 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5016 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5017 {
5018   PetscErrorCode ierr;
5019   PetscInt       m=A->rmap->n,n=B->cmap->n;
5020   Mat            Cmat;
5021 
5022   PetscFunctionBegin;
5023   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5024   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5025   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5026   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5027   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5028   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5029   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5030   *C   = Cmat;
5031   PetscFunctionReturn(0);
5032 }
5033 
5034 /* ----------------------------------------------------------------*/
5035 #undef __FUNCT__
5036 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5037 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5038 {
5039   PetscErrorCode ierr;
5040 
5041   PetscFunctionBegin;
5042   if (scall == MAT_INITIAL_MATRIX){
5043     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5044   }
5045   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5046   PetscFunctionReturn(0);
5047 }
5048 
5049 EXTERN_C_BEGIN
5050 #if defined(PETSC_HAVE_MUMPS)
5051 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5052 #endif
5053 #if defined(PETSC_HAVE_PASTIX)
5054 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5055 #endif
5056 #if defined(PETSC_HAVE_SUPERLU_DIST)
5057 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5058 #endif
5059 #if defined(PETSC_HAVE_SPOOLES)
5060 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5061 #endif
5062 EXTERN_C_END
5063 
5064 /*MC
5065    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5066 
5067    Options Database Keys:
5068 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5069 
5070   Level: beginner
5071 
5072 .seealso: MatCreateMPIAIJ()
5073 M*/
5074 
5075 EXTERN_C_BEGIN
5076 #undef __FUNCT__
5077 #define __FUNCT__ "MatCreate_MPIAIJ"
5078 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5079 {
5080   Mat_MPIAIJ     *b;
5081   PetscErrorCode ierr;
5082   PetscMPIInt    size;
5083 
5084   PetscFunctionBegin;
5085   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5086 
5087   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5088   B->data         = (void*)b;
5089   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5090   B->rmap->bs     = 1;
5091   B->assembled    = PETSC_FALSE;
5092   B->mapping      = 0;
5093 
5094   B->insertmode   = NOT_SET_VALUES;
5095   b->size         = size;
5096   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5097 
5098   /* build cache for off array entries formed */
5099   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5100   b->donotstash  = PETSC_FALSE;
5101   b->colmap      = 0;
5102   b->garray      = 0;
5103   b->roworiented = PETSC_TRUE;
5104 
5105   /* stuff used for matrix vector multiply */
5106   b->lvec      = PETSC_NULL;
5107   b->Mvctx     = PETSC_NULL;
5108 
5109   /* stuff for MatGetRow() */
5110   b->rowindices   = 0;
5111   b->rowvalues    = 0;
5112   b->getrowactive = PETSC_FALSE;
5113 
5114 #if defined(PETSC_HAVE_SPOOLES)
5115   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5116                                      "MatGetFactor_mpiaij_spooles",
5117                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5118 #endif
5119 #if defined(PETSC_HAVE_MUMPS)
5120   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5121                                      "MatGetFactor_aij_mumps",
5122                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5123 #endif
5124 #if defined(PETSC_HAVE_PASTIX)
5125   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5126 					   "MatGetFactor_mpiaij_pastix",
5127 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5128 #endif
5129 #if defined(PETSC_HAVE_SUPERLU_DIST)
5130   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5131                                      "MatGetFactor_mpiaij_superlu_dist",
5132                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5133 #endif
5134   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5135                                      "MatStoreValues_MPIAIJ",
5136                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5137   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5138                                      "MatRetrieveValues_MPIAIJ",
5139                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5140   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5141 				     "MatGetDiagonalBlock_MPIAIJ",
5142                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5143   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5144 				     "MatIsTranspose_MPIAIJ",
5145 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5146   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5147 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5148 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5149   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5150 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5151 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5152   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5153 				     "MatDiagonalScaleLocal_MPIAIJ",
5154 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5155   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5156                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5157                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5158   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5159                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5160                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5161   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5162                                      "MatConvert_MPIAIJ_MPISBAIJ",
5163                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5164   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5165                                      "MatMatMult_MPIDense_MPIAIJ",
5166                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5167   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5168                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5169                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5170   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5171                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5172                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5173   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5174   PetscFunctionReturn(0);
5175 }
5176 EXTERN_C_END
5177 
5178 #undef __FUNCT__
5179 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5180 /*@
5181      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5182          and "off-diagonal" part of the matrix in CSR format.
5183 
5184    Collective on MPI_Comm
5185 
5186    Input Parameters:
5187 +  comm - MPI communicator
5188 .  m - number of local rows (Cannot be PETSC_DECIDE)
5189 .  n - This value should be the same as the local size used in creating the
5190        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5191        calculated if N is given) For square matrices n is almost always m.
5192 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5193 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5194 .   i - row indices for "diagonal" portion of matrix
5195 .   j - column indices
5196 .   a - matrix values
5197 .   oi - row indices for "off-diagonal" portion of matrix
5198 .   oj - column indices
5199 -   oa - matrix values
5200 
5201    Output Parameter:
5202 .   mat - the matrix
5203 
5204    Level: advanced
5205 
5206    Notes:
5207        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5208 
5209        The i and j indices are 0 based
5210 
5211        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5212 
5213        This sets local rows and cannot be used to set off-processor values.
5214 
5215        You cannot later use MatSetValues() to change values in this matrix.
5216 
5217 .keywords: matrix, aij, compressed row, sparse, parallel
5218 
5219 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5220           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5221 @*/
5222 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5223 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5224 {
5225   PetscErrorCode ierr;
5226   Mat_MPIAIJ     *maij;
5227 
5228  PetscFunctionBegin;
5229   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5230   if (i[0]) {
5231     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5232   }
5233   if (oi[0]) {
5234     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5235   }
5236   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5237   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5238   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5239   maij = (Mat_MPIAIJ*) (*mat)->data;
5240   maij->donotstash     = PETSC_TRUE;
5241   (*mat)->preallocated = PETSC_TRUE;
5242 
5243   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5244   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5245   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5246   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5247 
5248   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5249   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5250 
5251   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5252   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5253   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5254   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5255 
5256   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5257   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5258   PetscFunctionReturn(0);
5259 }
5260 
5261 /*
5262     Special version for direct calls from Fortran
5263 */
5264 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5265 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5266 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5267 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5268 #endif
5269 
5270 /* Change these macros so can be used in void function */
5271 #undef CHKERRQ
5272 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5273 #undef SETERRQ2
5274 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5275 #undef SETERRQ
5276 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5277 
5278 EXTERN_C_BEGIN
5279 #undef __FUNCT__
5280 #define __FUNCT__ "matsetvaluesmpiaij_"
5281 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5282 {
5283   Mat             mat = *mmat;
5284   PetscInt        m = *mm, n = *mn;
5285   InsertMode      addv = *maddv;
5286   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5287   PetscScalar     value;
5288   PetscErrorCode  ierr;
5289 
5290   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5291   if (mat->insertmode == NOT_SET_VALUES) {
5292     mat->insertmode = addv;
5293   }
5294 #if defined(PETSC_USE_DEBUG)
5295   else if (mat->insertmode != addv) {
5296     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5297   }
5298 #endif
5299   {
5300   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5301   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5302   PetscTruth      roworiented = aij->roworiented;
5303 
5304   /* Some Variables required in the macro */
5305   Mat             A = aij->A;
5306   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5307   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5308   MatScalar       *aa = a->a;
5309   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5310   Mat             B = aij->B;
5311   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5312   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5313   MatScalar       *ba = b->a;
5314 
5315   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5316   PetscInt        nonew = a->nonew;
5317   MatScalar       *ap1,*ap2;
5318 
5319   PetscFunctionBegin;
5320   for (i=0; i<m; i++) {
5321     if (im[i] < 0) continue;
5322 #if defined(PETSC_USE_DEBUG)
5323     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5324 #endif
5325     if (im[i] >= rstart && im[i] < rend) {
5326       row      = im[i] - rstart;
5327       lastcol1 = -1;
5328       rp1      = aj + ai[row];
5329       ap1      = aa + ai[row];
5330       rmax1    = aimax[row];
5331       nrow1    = ailen[row];
5332       low1     = 0;
5333       high1    = nrow1;
5334       lastcol2 = -1;
5335       rp2      = bj + bi[row];
5336       ap2      = ba + bi[row];
5337       rmax2    = bimax[row];
5338       nrow2    = bilen[row];
5339       low2     = 0;
5340       high2    = nrow2;
5341 
5342       for (j=0; j<n; j++) {
5343         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5344         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5345         if (in[j] >= cstart && in[j] < cend){
5346           col = in[j] - cstart;
5347           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5348         } else if (in[j] < 0) continue;
5349 #if defined(PETSC_USE_DEBUG)
5350         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5351 #endif
5352         else {
5353           if (mat->was_assembled) {
5354             if (!aij->colmap) {
5355               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5356             }
5357 #if defined (PETSC_USE_CTABLE)
5358             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5359 	    col--;
5360 #else
5361             col = aij->colmap[in[j]] - 1;
5362 #endif
5363             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5364               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5365               col =  in[j];
5366               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5367               B = aij->B;
5368               b = (Mat_SeqAIJ*)B->data;
5369               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5370               rp2      = bj + bi[row];
5371               ap2      = ba + bi[row];
5372               rmax2    = bimax[row];
5373               nrow2    = bilen[row];
5374               low2     = 0;
5375               high2    = nrow2;
5376               bm       = aij->B->rmap->n;
5377               ba = b->a;
5378             }
5379           } else col = in[j];
5380           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5381         }
5382       }
5383     } else {
5384       if (!aij->donotstash) {
5385         if (roworiented) {
5386           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5387         } else {
5388           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5389         }
5390       }
5391     }
5392   }}
5393   PetscFunctionReturnVoid();
5394 }
5395 EXTERN_C_END
5396 
5397