xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d61e7eb36d836a8e2bf1ad3f41344afcd0d5cf20)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscBool      aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscBool      roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscBool      ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash || mat->nooffprocentries) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   mat->insertmode = addv; /* in case this processor had no cache */
470 
471   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
472   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
473   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
480 {
481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
482   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
483   PetscErrorCode ierr;
484   PetscMPIInt    n;
485   PetscInt       i,j,rstart,ncols,flg;
486   PetscInt       *row,*col;
487   PetscBool      other_disassembled;
488   PetscScalar    *val;
489   InsertMode     addv = mat->insertmode;
490 
491   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
492   PetscFunctionBegin;
493   if (!aij->donotstash && !mat->nooffprocentries) {
494     while (1) {
495       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
496       if (!flg) break;
497 
498       for (i=0; i<n;) {
499         /* Now identify the consecutive vals belonging to the same row */
500         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
501         if (j < n) ncols = j-i;
502         else       ncols = n-i;
503         /* Now assemble all these values with a single function call */
504         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
505         i = j;
506       }
507     }
508     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
509   }
510   a->compressedrow.use     = PETSC_FALSE;
511   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
512   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
513 
514   /* determine if any processor has disassembled, if so we must
515      also disassemble ourselfs, in order that we may reassemble. */
516   /*
517      if nonzero structure of submatrix B cannot change then we know that
518      no processor disassembled thus we can skip this stuff
519   */
520   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
521     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
522     if (mat->was_assembled && !other_disassembled) {
523       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
524     }
525   }
526   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
527     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
528   }
529   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
530   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
531   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
532   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
533 
534   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
535   aij->rowvalues = 0;
536 
537   /* used by MatAXPY() */
538   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
539   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
540 
541   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
542   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap->range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscBool      found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   if (A->nooffproczerorows) {
606     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
607     nrecvs = nsends;
608     nmax   = N;
609   } else {
610     /* inform other processors of number of messages and max length*/
611     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
612   }
613 
614   /* post receives:   */
615   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
616   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
617   for (i=0; i<nrecvs; i++) {
618     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
619   }
620 
621   /* do sends:
622       1) starts[i] gives the starting index in svalues for stuff going to
623          the ith processor
624   */
625   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
626   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
627   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
628   starts[0] = 0;
629   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   for (i=0; i<N; i++) {
631     svalues[starts[owner[i]]++] = rows[i];
632   }
633 
634   starts[0] = 0;
635   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
636   count = 0;
637   for (i=0; i<size; i++) {
638     if (nprocs[2*i+1]) {
639       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
640     }
641   }
642   ierr = PetscFree(starts);CHKERRQ(ierr);
643 
644   base = owners[rank];
645 
646   /*  wait on receives */
647   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
648   count  = nrecvs; slen = 0;
649   while (count) {
650     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
651     /* unpack receives into our local space */
652     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
653     source[imdex]  = recv_status.MPI_SOURCE;
654     lens[imdex]    = n;
655     slen          += n;
656     count--;
657   }
658   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
659 
660   /* move the data into the send scatter */
661   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
662   count = 0;
663   for (i=0; i<nrecvs; i++) {
664     values = rvalues + i*nmax;
665     for (j=0; j<lens[i]; j++) {
666       lrows[count++] = values[j] - base;
667     }
668   }
669   ierr = PetscFree(rvalues);CHKERRQ(ierr);
670   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
671   ierr = PetscFree(owner);CHKERRQ(ierr);
672   ierr = PetscFree(nprocs);CHKERRQ(ierr);
673 
674   /* actually zap the local rows */
675   /*
676         Zero the required rows. If the "diagonal block" of the matrix
677      is square and the user wishes to set the diagonal we use separate
678      code so that MatSetValues() is not called for each diagonal allocating
679      new memory, thus calling lots of mallocs and slowing things down.
680 
681   */
682   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
683   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
684   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
685     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
686   } else if (diag != 0.0) {
687     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
688     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
689       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
690 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
691     }
692     for (i = 0; i < slen; i++) {
693       row  = lrows[i] + rstart;
694       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
695     }
696     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
697     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
698   } else {
699     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
700   }
701   ierr = PetscFree(lrows);CHKERRQ(ierr);
702 
703   /* wait on sends */
704   if (nsends) {
705     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
706     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
707     ierr = PetscFree(send_status);CHKERRQ(ierr);
708   }
709   ierr = PetscFree(send_waits);CHKERRQ(ierr);
710   ierr = PetscFree(svalues);CHKERRQ(ierr);
711 
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatMult_MPIAIJ"
717 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
718 {
719   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
720   PetscErrorCode ierr;
721   PetscInt       nt;
722 
723   PetscFunctionBegin;
724   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
725   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
726   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
728   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
729   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
730   PetscFunctionReturn(0);
731 }
732 
733 #undef __FUNCT__
734 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
735 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
736 {
737   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
738   PetscErrorCode ierr;
739 
740   PetscFunctionBegin;
741   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatMultAdd_MPIAIJ"
747 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
748 {
749   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
754   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
755   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
756   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
757   PetscFunctionReturn(0);
758 }
759 
760 #undef __FUNCT__
761 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
762 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
763 {
764   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
765   PetscErrorCode ierr;
766   PetscBool      merged;
767 
768   PetscFunctionBegin;
769   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
770   /* do nondiagonal part */
771   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
772   if (!merged) {
773     /* send it on its way */
774     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
775     /* do local part */
776     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
777     /* receive remote parts: note this assumes the values are not actually */
778     /* added in yy until the next line, */
779     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780   } else {
781     /* do local part */
782     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
783     /* send it on its way */
784     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785     /* values actually were received in the Begin() but we need to call this nop */
786     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
787   }
788   PetscFunctionReturn(0);
789 }
790 
791 EXTERN_C_BEGIN
792 #undef __FUNCT__
793 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
794 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
795 {
796   MPI_Comm       comm;
797   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
798   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
799   IS             Me,Notme;
800   PetscErrorCode ierr;
801   PetscInt       M,N,first,last,*notme,i;
802   PetscMPIInt    size;
803 
804   PetscFunctionBegin;
805 
806   /* Easy test: symmetric diagonal block */
807   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
808   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
809   if (!*f) PetscFunctionReturn(0);
810   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
811   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
812   if (size == 1) PetscFunctionReturn(0);
813 
814   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
815   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
816   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
817   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
818   for (i=0; i<first; i++) notme[i] = i;
819   for (i=last; i<M; i++) notme[i-last+first] = i;
820   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
821   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
822   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
823   Aoff = Aoffs[0];
824   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
825   Boff = Boffs[0];
826   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
827   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
828   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
829   ierr = ISDestroy(Me);CHKERRQ(ierr);
830   ierr = ISDestroy(Notme);CHKERRQ(ierr);
831 
832   PetscFunctionReturn(0);
833 }
834 EXTERN_C_END
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
838 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
839 {
840   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode ierr;
842 
843   PetscFunctionBegin;
844   /* do nondiagonal part */
845   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
846   /* send it on its way */
847   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
848   /* do local part */
849   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
850   /* receive remote parts */
851   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
852   PetscFunctionReturn(0);
853 }
854 
855 /*
856   This only works correctly for square matrices where the subblock A->A is the
857    diagonal block
858 */
859 #undef __FUNCT__
860 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
861 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
862 {
863   PetscErrorCode ierr;
864   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
865 
866   PetscFunctionBegin;
867   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
868   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
869   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIAIJ"
875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
876 {
877   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
882   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatDestroy_MPIAIJ"
888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
889 {
890   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_LOG)
895   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
896 #endif
897   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
898   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
899   if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);}
900   if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);}
901 #if defined (PETSC_USE_CTABLE)
902   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
903 #else
904   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
905 #endif
906   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
907   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
908   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
909   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
910   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
911   ierr = PetscFree(aij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatView_MPIAIJ_Binary"
927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
928 {
929   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
930   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
931   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
932   PetscErrorCode    ierr;
933   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
934   int               fd;
935   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
936   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
937   PetscScalar       *column_values;
938 
939   PetscFunctionBegin;
940   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
941   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
942   nz   = A->nz + B->nz;
943   if (!rank) {
944     header[0] = MAT_FILE_CLASSID;
945     header[1] = mat->rmap->N;
946     header[2] = mat->cmap->N;
947     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
948     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
949     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
950     /* get largest number of rows any processor has */
951     rlen = mat->rmap->n;
952     range = mat->rmap->range;
953     for (i=1; i<size; i++) {
954       rlen = PetscMax(rlen,range[i+1] - range[i]);
955     }
956   } else {
957     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
958     rlen = mat->rmap->n;
959   }
960 
961   /* load up the local row counts */
962   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
963   for (i=0; i<mat->rmap->n; i++) {
964     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
965   }
966 
967   /* store the row lengths to the file */
968   if (!rank) {
969     MPI_Status status;
970     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
971     for (i=1; i<size; i++) {
972       rlen = range[i+1] - range[i];
973       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
974       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
975     }
976   } else {
977     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
978   }
979   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
980 
981   /* load up the local column indices */
982   nzmax = nz; /* )th processor needs space a largest processor needs */
983   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
984   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
985   cnt  = 0;
986   for (i=0; i<mat->rmap->n; i++) {
987     for (j=B->i[i]; j<B->i[i+1]; j++) {
988       if ( (col = garray[B->j[j]]) > cstart) break;
989       column_indices[cnt++] = col;
990     }
991     for (k=A->i[i]; k<A->i[i+1]; k++) {
992       column_indices[cnt++] = A->j[k] + cstart;
993     }
994     for (; j<B->i[i+1]; j++) {
995       column_indices[cnt++] = garray[B->j[j]];
996     }
997   }
998   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
999 
1000   /* store the column indices to the file */
1001   if (!rank) {
1002     MPI_Status status;
1003     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1004     for (i=1; i<size; i++) {
1005       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1006       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1007       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1008       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1009     }
1010   } else {
1011     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1012     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1013   }
1014   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1015 
1016   /* load up the local column values */
1017   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1018   cnt  = 0;
1019   for (i=0; i<mat->rmap->n; i++) {
1020     for (j=B->i[i]; j<B->i[i+1]; j++) {
1021       if ( garray[B->j[j]] > cstart) break;
1022       column_values[cnt++] = B->a[j];
1023     }
1024     for (k=A->i[i]; k<A->i[i+1]; k++) {
1025       column_values[cnt++] = A->a[k];
1026     }
1027     for (; j<B->i[i+1]; j++) {
1028       column_values[cnt++] = B->a[j];
1029     }
1030   }
1031   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1032 
1033   /* store the column values to the file */
1034   if (!rank) {
1035     MPI_Status status;
1036     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1037     for (i=1; i<size; i++) {
1038       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1039       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1040       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1041       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1042     }
1043   } else {
1044     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1045     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046   }
1047   ierr = PetscFree(column_values);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1053 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1054 {
1055   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1056   PetscErrorCode    ierr;
1057   PetscMPIInt       rank = aij->rank,size = aij->size;
1058   PetscBool         isdraw,iascii,isbinary;
1059   PetscViewer       sviewer;
1060   PetscViewerFormat format;
1061 
1062   PetscFunctionBegin;
1063   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1064   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1065   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1066   if (iascii) {
1067     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1068     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1069       MatInfo    info;
1070       PetscBool  inodes;
1071 
1072       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1073       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1074       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1075       if (!inodes) {
1076         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1077 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1078       } else {
1079         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1080 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1081       }
1082       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1083       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1084       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1085       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1086       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1087       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1088       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1089       PetscFunctionReturn(0);
1090     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1091       PetscInt   inodecount,inodelimit,*inodes;
1092       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1093       if (inodes) {
1094         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1095       } else {
1096         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1097       }
1098       PetscFunctionReturn(0);
1099     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1100       PetscFunctionReturn(0);
1101     }
1102   } else if (isbinary) {
1103     if (size == 1) {
1104       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1105       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1106     } else {
1107       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1108     }
1109     PetscFunctionReturn(0);
1110   } else if (isdraw) {
1111     PetscDraw  draw;
1112     PetscBool  isnull;
1113     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1114     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1115   }
1116 
1117   if (size == 1) {
1118     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1119     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1120   } else {
1121     /* assemble the entire matrix onto first processor. */
1122     Mat         A;
1123     Mat_SeqAIJ  *Aloc;
1124     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1125     MatScalar   *a;
1126 
1127     if (mat->rmap->N > 1024) {
1128       PetscBool  flg = PETSC_FALSE;
1129 
1130       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1131       if (!flg) {
1132         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1133       }
1134     }
1135 
1136     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1137     if (!rank) {
1138       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1139     } else {
1140       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1141     }
1142     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1143     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1144     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1145     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1146 
1147     /* copy over the A part */
1148     Aloc = (Mat_SeqAIJ*)aij->A->data;
1149     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1150     row = mat->rmap->rstart;
1151     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1152     for (i=0; i<m; i++) {
1153       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1154       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1155     }
1156     aj = Aloc->j;
1157     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1158 
1159     /* copy over the B part */
1160     Aloc = (Mat_SeqAIJ*)aij->B->data;
1161     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1162     row  = mat->rmap->rstart;
1163     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1164     ct   = cols;
1165     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1166     for (i=0; i<m; i++) {
1167       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1168       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1169     }
1170     ierr = PetscFree(ct);CHKERRQ(ierr);
1171     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     /*
1174        Everyone has to call to draw the matrix since the graphics waits are
1175        synchronized across all processors that share the PetscDraw object
1176     */
1177     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1178     if (!rank) {
1179       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1180       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1181       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1182       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1183     }
1184     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1185     ierr = MatDestroy(A);CHKERRQ(ierr);
1186   }
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatView_MPIAIJ"
1192 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1193 {
1194   PetscErrorCode ierr;
1195   PetscBool      iascii,isdraw,issocket,isbinary;
1196 
1197   PetscFunctionBegin;
1198   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1199   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1200   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1202   if (iascii || isdraw || isbinary || issocket) {
1203     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1204   } else {
1205     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1206   }
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatSOR_MPIAIJ"
1212 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1213 {
1214   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1215   PetscErrorCode ierr;
1216   Vec            bb1 = 0;
1217   PetscBool      hasop;
1218 
1219   PetscFunctionBegin;
1220   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1221     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1222   }
1223 
1224   if (flag == SOR_APPLY_UPPER) {
1225     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1226     PetscFunctionReturn(0);
1227   }
1228 
1229   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1230     if (flag & SOR_ZERO_INITIAL_GUESS) {
1231       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1232       its--;
1233     }
1234 
1235     while (its--) {
1236       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1237       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1238 
1239       /* update rhs: bb1 = bb - B*x */
1240       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1241       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1242 
1243       /* local sweep */
1244       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1245     }
1246   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1247     if (flag & SOR_ZERO_INITIAL_GUESS) {
1248       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1249       its--;
1250     }
1251     while (its--) {
1252       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1253       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1254 
1255       /* update rhs: bb1 = bb - B*x */
1256       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1257       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1258 
1259       /* local sweep */
1260       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1261     }
1262   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1263     if (flag & SOR_ZERO_INITIAL_GUESS) {
1264       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1265       its--;
1266     }
1267     while (its--) {
1268       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1270 
1271       /* update rhs: bb1 = bb - B*x */
1272       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1273       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1274 
1275       /* local sweep */
1276       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1277     }
1278   }  else if (flag & SOR_EISENSTAT) {
1279     Vec         xx1;
1280 
1281     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1282     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1283 
1284     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1285     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1286     if (!mat->diag) {
1287       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1288       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1289     }
1290     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1291     if (hasop) {
1292       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1293     } else {
1294       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1295     }
1296     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1297 
1298     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1299 
1300     /* local sweep */
1301     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1302     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1303     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1304   } else {
1305     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1306   }
1307 
1308   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatPermute_MPIAIJ"
1314 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1315 {
1316   MPI_Comm       comm,pcomm;
1317   PetscInt       first,local_size,nrows;
1318   const PetscInt *rows;
1319   PetscMPIInt    size;
1320   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1321   PetscErrorCode ierr;
1322 
1323   PetscFunctionBegin;
1324   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1325   /* make a collective version of 'rowp' */
1326   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1327   if (pcomm==comm) {
1328     crowp = rowp;
1329   } else {
1330     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1331     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1332     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1333     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1334   }
1335   /* collect the global row permutation and invert it */
1336   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1337   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1338   if (pcomm!=comm) {
1339     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1340   }
1341   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1342   /* get the local target indices */
1343   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1344   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1345   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1346   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1347   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1348   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1349   /* the column permutation is so much easier;
1350      make a local version of 'colp' and invert it */
1351   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1352   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1353   if (size==1) {
1354     lcolp = colp;
1355   } else {
1356     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1357     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1358     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1359   }
1360   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1361   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1362   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1363   if (size>1) {
1364     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1365     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1366   }
1367   /* now we just get the submatrix */
1368   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1369   /* clean up */
1370   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1371   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1377 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1378 {
1379   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1380   Mat            A = mat->A,B = mat->B;
1381   PetscErrorCode ierr;
1382   PetscReal      isend[5],irecv[5];
1383 
1384   PetscFunctionBegin;
1385   info->block_size     = 1.0;
1386   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1387   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1388   isend[3] = info->memory;  isend[4] = info->mallocs;
1389   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1390   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1391   isend[3] += info->memory;  isend[4] += info->mallocs;
1392   if (flag == MAT_LOCAL) {
1393     info->nz_used      = isend[0];
1394     info->nz_allocated = isend[1];
1395     info->nz_unneeded  = isend[2];
1396     info->memory       = isend[3];
1397     info->mallocs      = isend[4];
1398   } else if (flag == MAT_GLOBAL_MAX) {
1399     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1400     info->nz_used      = irecv[0];
1401     info->nz_allocated = irecv[1];
1402     info->nz_unneeded  = irecv[2];
1403     info->memory       = irecv[3];
1404     info->mallocs      = irecv[4];
1405   } else if (flag == MAT_GLOBAL_SUM) {
1406     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1407     info->nz_used      = irecv[0];
1408     info->nz_allocated = irecv[1];
1409     info->nz_unneeded  = irecv[2];
1410     info->memory       = irecv[3];
1411     info->mallocs      = irecv[4];
1412   }
1413   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1414   info->fill_ratio_needed = 0;
1415   info->factor_mallocs    = 0;
1416 
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatSetOption_MPIAIJ"
1422 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1423 {
1424   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1425   PetscErrorCode ierr;
1426 
1427   PetscFunctionBegin;
1428   switch (op) {
1429   case MAT_NEW_NONZERO_LOCATIONS:
1430   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1431   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1432   case MAT_KEEP_NONZERO_PATTERN:
1433   case MAT_NEW_NONZERO_LOCATION_ERR:
1434   case MAT_USE_INODES:
1435   case MAT_IGNORE_ZERO_ENTRIES:
1436     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1437     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1438     break;
1439   case MAT_ROW_ORIENTED:
1440     a->roworiented = flg;
1441     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1442     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1443     break;
1444   case MAT_NEW_DIAGONALS:
1445     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1446     break;
1447   case MAT_IGNORE_OFF_PROC_ENTRIES:
1448     a->donotstash = PETSC_TRUE;
1449     break;
1450   case MAT_SPD:
1451     A->spd_set                         = PETSC_TRUE;
1452     A->spd                             = flg;
1453     if (flg) {
1454       A->symmetric                     = PETSC_TRUE;
1455       A->structurally_symmetric        = PETSC_TRUE;
1456       A->symmetric_set                 = PETSC_TRUE;
1457       A->structurally_symmetric_set    = PETSC_TRUE;
1458     }
1459     break;
1460   case MAT_SYMMETRIC:
1461     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1462     break;
1463   case MAT_STRUCTURALLY_SYMMETRIC:
1464     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1465     break;
1466   case MAT_HERMITIAN:
1467     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1468     break;
1469   case MAT_SYMMETRY_ETERNAL:
1470     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1471     break;
1472   default:
1473     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1474   }
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatGetRow_MPIAIJ"
1480 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1481 {
1482   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1483   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1484   PetscErrorCode ierr;
1485   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1486   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1487   PetscInt       *cmap,*idx_p;
1488 
1489   PetscFunctionBegin;
1490   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1491   mat->getrowactive = PETSC_TRUE;
1492 
1493   if (!mat->rowvalues && (idx || v)) {
1494     /*
1495         allocate enough space to hold information from the longest row.
1496     */
1497     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1498     PetscInt   max = 1,tmp;
1499     for (i=0; i<matin->rmap->n; i++) {
1500       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1501       if (max < tmp) { max = tmp; }
1502     }
1503     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1504   }
1505 
1506   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1507   lrow = row - rstart;
1508 
1509   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1510   if (!v)   {pvA = 0; pvB = 0;}
1511   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1512   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1513   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1514   nztot = nzA + nzB;
1515 
1516   cmap  = mat->garray;
1517   if (v  || idx) {
1518     if (nztot) {
1519       /* Sort by increasing column numbers, assuming A and B already sorted */
1520       PetscInt imark = -1;
1521       if (v) {
1522         *v = v_p = mat->rowvalues;
1523         for (i=0; i<nzB; i++) {
1524           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1525           else break;
1526         }
1527         imark = i;
1528         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1529         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1530       }
1531       if (idx) {
1532         *idx = idx_p = mat->rowindices;
1533         if (imark > -1) {
1534           for (i=0; i<imark; i++) {
1535             idx_p[i] = cmap[cworkB[i]];
1536           }
1537         } else {
1538           for (i=0; i<nzB; i++) {
1539             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1540             else break;
1541           }
1542           imark = i;
1543         }
1544         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1545         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1546       }
1547     } else {
1548       if (idx) *idx = 0;
1549       if (v)   *v   = 0;
1550     }
1551   }
1552   *nz = nztot;
1553   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1554   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1560 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1561 {
1562   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1563 
1564   PetscFunctionBegin;
1565   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1566   aij->getrowactive = PETSC_FALSE;
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatNorm_MPIAIJ"
1572 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1573 {
1574   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1575   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1576   PetscErrorCode ierr;
1577   PetscInt       i,j,cstart = mat->cmap->rstart;
1578   PetscReal      sum = 0.0;
1579   MatScalar      *v;
1580 
1581   PetscFunctionBegin;
1582   if (aij->size == 1) {
1583     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1584   } else {
1585     if (type == NORM_FROBENIUS) {
1586       v = amat->a;
1587       for (i=0; i<amat->nz; i++) {
1588 #if defined(PETSC_USE_COMPLEX)
1589         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1590 #else
1591         sum += (*v)*(*v); v++;
1592 #endif
1593       }
1594       v = bmat->a;
1595       for (i=0; i<bmat->nz; i++) {
1596 #if defined(PETSC_USE_COMPLEX)
1597         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1598 #else
1599         sum += (*v)*(*v); v++;
1600 #endif
1601       }
1602       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1603       *norm = sqrt(*norm);
1604     } else if (type == NORM_1) { /* max column norm */
1605       PetscReal *tmp,*tmp2;
1606       PetscInt  *jj,*garray = aij->garray;
1607       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1608       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1609       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1610       *norm = 0.0;
1611       v = amat->a; jj = amat->j;
1612       for (j=0; j<amat->nz; j++) {
1613         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1614       }
1615       v = bmat->a; jj = bmat->j;
1616       for (j=0; j<bmat->nz; j++) {
1617         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1618       }
1619       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1620       for (j=0; j<mat->cmap->N; j++) {
1621         if (tmp2[j] > *norm) *norm = tmp2[j];
1622       }
1623       ierr = PetscFree(tmp);CHKERRQ(ierr);
1624       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1625     } else if (type == NORM_INFINITY) { /* max row norm */
1626       PetscReal ntemp = 0.0;
1627       for (j=0; j<aij->A->rmap->n; j++) {
1628         v = amat->a + amat->i[j];
1629         sum = 0.0;
1630         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1631           sum += PetscAbsScalar(*v); v++;
1632         }
1633         v = bmat->a + bmat->i[j];
1634         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1635           sum += PetscAbsScalar(*v); v++;
1636         }
1637         if (sum > ntemp) ntemp = sum;
1638       }
1639       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1640     } else {
1641       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1642     }
1643   }
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 #undef __FUNCT__
1648 #define __FUNCT__ "MatTranspose_MPIAIJ"
1649 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1650 {
1651   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1652   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1653   PetscErrorCode ierr;
1654   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1655   PetscInt       cstart=A->cmap->rstart,ncol;
1656   Mat            B;
1657   MatScalar      *array;
1658 
1659   PetscFunctionBegin;
1660   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1661 
1662   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1663   ai = Aloc->i; aj = Aloc->j;
1664   bi = Bloc->i; bj = Bloc->j;
1665   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1666     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1667     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1668     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1669     for (i=0; i<ai[ma]; i++){
1670       d_nnz[aj[i]] ++;
1671       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1672     }
1673 
1674     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1675     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1676     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1677     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1678     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1679   } else {
1680     B = *matout;
1681   }
1682 
1683   /* copy over the A part */
1684   array = Aloc->a;
1685   row = A->rmap->rstart;
1686   for (i=0; i<ma; i++) {
1687     ncol = ai[i+1]-ai[i];
1688     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1689     row++; array += ncol; aj += ncol;
1690   }
1691   aj = Aloc->j;
1692   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1693 
1694   /* copy over the B part */
1695   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1696   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1697   array = Bloc->a;
1698   row = A->rmap->rstart;
1699   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1700   cols_tmp = cols;
1701   for (i=0; i<mb; i++) {
1702     ncol = bi[i+1]-bi[i];
1703     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1704     row++; array += ncol; cols_tmp += ncol;
1705   }
1706   ierr = PetscFree(cols);CHKERRQ(ierr);
1707 
1708   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1709   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1710   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1711     *matout = B;
1712   } else {
1713     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1714   }
1715   PetscFunctionReturn(0);
1716 }
1717 
1718 #undef __FUNCT__
1719 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1720 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1721 {
1722   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1723   Mat            a = aij->A,b = aij->B;
1724   PetscErrorCode ierr;
1725   PetscInt       s1,s2,s3;
1726 
1727   PetscFunctionBegin;
1728   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1729   if (rr) {
1730     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1731     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1732     /* Overlap communication with computation. */
1733     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1734   }
1735   if (ll) {
1736     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1737     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1738     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1739   }
1740   /* scale  the diagonal block */
1741   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1742 
1743   if (rr) {
1744     /* Do a scatter end and then right scale the off-diagonal block */
1745     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1746     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1747   }
1748 
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 #undef __FUNCT__
1753 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1754 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1755 {
1756   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1757   PetscErrorCode ierr;
1758 
1759   PetscFunctionBegin;
1760   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1761   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1762   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1763   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1764   PetscFunctionReturn(0);
1765 }
1766 #undef __FUNCT__
1767 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1768 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1769 {
1770   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1771   PetscErrorCode ierr;
1772 
1773   PetscFunctionBegin;
1774   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #undef __FUNCT__
1779 #define __FUNCT__ "MatEqual_MPIAIJ"
1780 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
1781 {
1782   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1783   Mat            a,b,c,d;
1784   PetscBool      flg;
1785   PetscErrorCode ierr;
1786 
1787   PetscFunctionBegin;
1788   a = matA->A; b = matA->B;
1789   c = matB->A; d = matB->B;
1790 
1791   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1792   if (flg) {
1793     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1794   }
1795   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 #undef __FUNCT__
1800 #define __FUNCT__ "MatCopy_MPIAIJ"
1801 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1802 {
1803   PetscErrorCode ierr;
1804   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1805   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1806 
1807   PetscFunctionBegin;
1808   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1809   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1810     /* because of the column compression in the off-processor part of the matrix a->B,
1811        the number of columns in a->B and b->B may be different, hence we cannot call
1812        the MatCopy() directly on the two parts. If need be, we can provide a more
1813        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1814        then copying the submatrices */
1815     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1816   } else {
1817     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1818     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1819   }
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1825 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1826 {
1827   PetscErrorCode ierr;
1828 
1829   PetscFunctionBegin;
1830   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatAXPY_MPIAIJ"
1836 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1837 {
1838   PetscErrorCode ierr;
1839   PetscInt       i;
1840   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1841   PetscBLASInt   bnz,one=1;
1842   Mat_SeqAIJ     *x,*y;
1843 
1844   PetscFunctionBegin;
1845   if (str == SAME_NONZERO_PATTERN) {
1846     PetscScalar alpha = a;
1847     x = (Mat_SeqAIJ *)xx->A->data;
1848     y = (Mat_SeqAIJ *)yy->A->data;
1849     bnz = PetscBLASIntCast(x->nz);
1850     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1851     x = (Mat_SeqAIJ *)xx->B->data;
1852     y = (Mat_SeqAIJ *)yy->B->data;
1853     bnz = PetscBLASIntCast(x->nz);
1854     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1855   } else if (str == SUBSET_NONZERO_PATTERN) {
1856     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1857 
1858     x = (Mat_SeqAIJ *)xx->B->data;
1859     y = (Mat_SeqAIJ *)yy->B->data;
1860     if (y->xtoy && y->XtoY != xx->B) {
1861       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1862       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1863     }
1864     if (!y->xtoy) { /* get xtoy */
1865       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1866       y->XtoY = xx->B;
1867       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1868     }
1869     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1870   } else {
1871     Mat B;
1872     PetscInt *nnz_d,*nnz_o;
1873     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
1874     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
1875     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
1876     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1877     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1878     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
1879     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1880     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
1881     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
1882     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1883     ierr = MatHeaderReplace(Y,B);
1884     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1885     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1886   }
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatConjugate_MPIAIJ"
1894 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1895 {
1896 #if defined(PETSC_USE_COMPLEX)
1897   PetscErrorCode ierr;
1898   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1899 
1900   PetscFunctionBegin;
1901   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1902   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1903 #else
1904   PetscFunctionBegin;
1905 #endif
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 #undef __FUNCT__
1910 #define __FUNCT__ "MatRealPart_MPIAIJ"
1911 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1912 {
1913   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1914   PetscErrorCode ierr;
1915 
1916   PetscFunctionBegin;
1917   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1918   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1924 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1925 {
1926   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1927   PetscErrorCode ierr;
1928 
1929   PetscFunctionBegin;
1930   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1931   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #ifdef PETSC_HAVE_PBGL
1936 
1937 #include <boost/parallel/mpi/bsp_process_group.hpp>
1938 #include <boost/graph/distributed/ilu_default_graph.hpp>
1939 #include <boost/graph/distributed/ilu_0_block.hpp>
1940 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1941 #include <boost/graph/distributed/petsc/interface.hpp>
1942 #include <boost/multi_array.hpp>
1943 #include <boost/parallel/distributed_property_map->hpp>
1944 
1945 #undef __FUNCT__
1946 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1947 /*
1948   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1949 */
1950 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1951 {
1952   namespace petsc = boost::distributed::petsc;
1953 
1954   namespace graph_dist = boost::graph::distributed;
1955   using boost::graph::distributed::ilu_default::process_group_type;
1956   using boost::graph::ilu_permuted;
1957 
1958   PetscBool       row_identity, col_identity;
1959   PetscContainer  c;
1960   PetscInt        m, n, M, N;
1961   PetscErrorCode  ierr;
1962 
1963   PetscFunctionBegin;
1964   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1965   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1966   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1967   if (!row_identity || !col_identity) {
1968     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1969   }
1970 
1971   process_group_type pg;
1972   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1973   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1974   lgraph_type&   level_graph = *lgraph_p;
1975   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1976 
1977   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1978   ilu_permuted(level_graph);
1979 
1980   /* put together the new matrix */
1981   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1982   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1983   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1984   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1985   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1986   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1987   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1988 
1989   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1990   ierr = PetscContainerSetPointer(c, lgraph_p);
1991   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1992   PetscFunctionReturn(0);
1993 }
1994 
1995 #undef __FUNCT__
1996 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1997 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1998 {
1999   PetscFunctionBegin;
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatSolve_MPIAIJ"
2005 /*
2006   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2007 */
2008 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2009 {
2010   namespace graph_dist = boost::graph::distributed;
2011 
2012   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2013   lgraph_type*   lgraph_p;
2014   PetscContainer c;
2015   PetscErrorCode ierr;
2016 
2017   PetscFunctionBegin;
2018   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2019   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2020   ierr = VecCopy(b, x);CHKERRQ(ierr);
2021 
2022   PetscScalar* array_x;
2023   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2024   PetscInt sx;
2025   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2026 
2027   PetscScalar* array_b;
2028   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2029   PetscInt sb;
2030   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2031 
2032   lgraph_type&   level_graph = *lgraph_p;
2033   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2034 
2035   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2036   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2037                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2038 
2039   typedef boost::iterator_property_map<array_ref_type::iterator,
2040                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2041   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2042                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2043 
2044   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2045 
2046   PetscFunctionReturn(0);
2047 }
2048 #endif
2049 
2050 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2051   PetscInt       nzlocal,nsends,nrecvs;
2052   PetscMPIInt    *send_rank,*recv_rank;
2053   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2054   PetscScalar    *sbuf_a,**rbuf_a;
2055   PetscErrorCode (*MatDestroy)(Mat);
2056 } Mat_Redundant;
2057 
2058 #undef __FUNCT__
2059 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2060 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2061 {
2062   PetscErrorCode       ierr;
2063   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2064   PetscInt             i;
2065 
2066   PetscFunctionBegin;
2067   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2068   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2069   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2070   for (i=0; i<redund->nrecvs; i++){
2071     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2072     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2073   }
2074   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2075   ierr = PetscFree(redund);CHKERRQ(ierr);
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 #undef __FUNCT__
2080 #define __FUNCT__ "MatDestroy_MatRedundant"
2081 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2082 {
2083   PetscErrorCode  ierr;
2084   PetscContainer  container;
2085   Mat_Redundant   *redund=PETSC_NULL;
2086 
2087   PetscFunctionBegin;
2088   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2089   if (container) {
2090     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2091   } else {
2092     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2093   }
2094   A->ops->destroy = redund->MatDestroy;
2095   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2096   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2097   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 #undef __FUNCT__
2102 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2103 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2104 {
2105   PetscMPIInt    rank,size;
2106   MPI_Comm       comm=((PetscObject)mat)->comm;
2107   PetscErrorCode ierr;
2108   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2109   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2110   PetscInt       *rowrange=mat->rmap->range;
2111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2112   Mat            A=aij->A,B=aij->B,C=*matredundant;
2113   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2114   PetscScalar    *sbuf_a;
2115   PetscInt       nzlocal=a->nz+b->nz;
2116   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2117   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2118   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2119   MatScalar      *aworkA,*aworkB;
2120   PetscScalar    *vals;
2121   PetscMPIInt    tag1,tag2,tag3,imdex;
2122   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2123                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2124   MPI_Status     recv_status,*send_status;
2125   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2126   PetscInt       **rbuf_j=PETSC_NULL;
2127   PetscScalar    **rbuf_a=PETSC_NULL;
2128   Mat_Redundant  *redund=PETSC_NULL;
2129   PetscContainer container;
2130 
2131   PetscFunctionBegin;
2132   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2133   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2134 
2135   if (reuse == MAT_REUSE_MATRIX) {
2136     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2137     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2138     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2139     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2140     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2141     if (container) {
2142       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2143     } else {
2144       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2145     }
2146     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2147 
2148     nsends    = redund->nsends;
2149     nrecvs    = redund->nrecvs;
2150     send_rank = redund->send_rank;
2151     recv_rank = redund->recv_rank;
2152     sbuf_nz   = redund->sbuf_nz;
2153     rbuf_nz   = redund->rbuf_nz;
2154     sbuf_j    = redund->sbuf_j;
2155     sbuf_a    = redund->sbuf_a;
2156     rbuf_j    = redund->rbuf_j;
2157     rbuf_a    = redund->rbuf_a;
2158   }
2159 
2160   if (reuse == MAT_INITIAL_MATRIX){
2161     PetscMPIInt  subrank,subsize;
2162     PetscInt     nleftover,np_subcomm;
2163     /* get the destination processors' id send_rank, nsends and nrecvs */
2164     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2165     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2166     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2167     np_subcomm = size/nsubcomm;
2168     nleftover  = size - nsubcomm*np_subcomm;
2169     nsends = 0; nrecvs = 0;
2170     for (i=0; i<size; i++){ /* i=rank*/
2171       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2172         send_rank[nsends] = i; nsends++;
2173         recv_rank[nrecvs++] = i;
2174       }
2175     }
2176     if (rank >= size - nleftover){/* this proc is a leftover processor */
2177       i = size-nleftover-1;
2178       j = 0;
2179       while (j < nsubcomm - nleftover){
2180         send_rank[nsends++] = i;
2181         i--; j++;
2182       }
2183     }
2184 
2185     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2186       for (i=0; i<nleftover; i++){
2187         recv_rank[nrecvs++] = size-nleftover+i;
2188       }
2189     }
2190 
2191     /* allocate sbuf_j, sbuf_a */
2192     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2193     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2194     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2195   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2196 
2197   /* copy mat's local entries into the buffers */
2198   if (reuse == MAT_INITIAL_MATRIX){
2199     rownz_max = 0;
2200     rptr = sbuf_j;
2201     cols = sbuf_j + rend-rstart + 1;
2202     vals = sbuf_a;
2203     rptr[0] = 0;
2204     for (i=0; i<rend-rstart; i++){
2205       row = i + rstart;
2206       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2207       ncols  = nzA + nzB;
2208       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2209       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2210       /* load the column indices for this row into cols */
2211       lwrite = 0;
2212       for (l=0; l<nzB; l++) {
2213         if ((ctmp = bmap[cworkB[l]]) < cstart){
2214           vals[lwrite]   = aworkB[l];
2215           cols[lwrite++] = ctmp;
2216         }
2217       }
2218       for (l=0; l<nzA; l++){
2219         vals[lwrite]   = aworkA[l];
2220         cols[lwrite++] = cstart + cworkA[l];
2221       }
2222       for (l=0; l<nzB; l++) {
2223         if ((ctmp = bmap[cworkB[l]]) >= cend){
2224           vals[lwrite]   = aworkB[l];
2225           cols[lwrite++] = ctmp;
2226         }
2227       }
2228       vals += ncols;
2229       cols += ncols;
2230       rptr[i+1] = rptr[i] + ncols;
2231       if (rownz_max < ncols) rownz_max = ncols;
2232     }
2233     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2234   } else { /* only copy matrix values into sbuf_a */
2235     rptr = sbuf_j;
2236     vals = sbuf_a;
2237     rptr[0] = 0;
2238     for (i=0; i<rend-rstart; i++){
2239       row = i + rstart;
2240       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2241       ncols  = nzA + nzB;
2242       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2243       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2244       lwrite = 0;
2245       for (l=0; l<nzB; l++) {
2246         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2247       }
2248       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2249       for (l=0; l<nzB; l++) {
2250         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2251       }
2252       vals += ncols;
2253       rptr[i+1] = rptr[i] + ncols;
2254     }
2255   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2256 
2257   /* send nzlocal to others, and recv other's nzlocal */
2258   /*--------------------------------------------------*/
2259   if (reuse == MAT_INITIAL_MATRIX){
2260     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2261     s_waits2 = s_waits3 + nsends;
2262     s_waits1 = s_waits2 + nsends;
2263     r_waits1 = s_waits1 + nsends;
2264     r_waits2 = r_waits1 + nrecvs;
2265     r_waits3 = r_waits2 + nrecvs;
2266   } else {
2267     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2268     r_waits3 = s_waits3 + nsends;
2269   }
2270 
2271   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2272   if (reuse == MAT_INITIAL_MATRIX){
2273     /* get new tags to keep the communication clean */
2274     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2275     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2276     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2277 
2278     /* post receives of other's nzlocal */
2279     for (i=0; i<nrecvs; i++){
2280       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2281     }
2282     /* send nzlocal to others */
2283     for (i=0; i<nsends; i++){
2284       sbuf_nz[i] = nzlocal;
2285       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2286     }
2287     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2288     count = nrecvs;
2289     while (count) {
2290       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2291       recv_rank[imdex] = recv_status.MPI_SOURCE;
2292       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2293       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2294 
2295       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2296       rbuf_nz[imdex] += i + 2;
2297       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2298       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2299       count--;
2300     }
2301     /* wait on sends of nzlocal */
2302     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2303     /* send mat->i,j to others, and recv from other's */
2304     /*------------------------------------------------*/
2305     for (i=0; i<nsends; i++){
2306       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2307       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2308     }
2309     /* wait on receives of mat->i,j */
2310     /*------------------------------*/
2311     count = nrecvs;
2312     while (count) {
2313       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2314       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2315       count--;
2316     }
2317     /* wait on sends of mat->i,j */
2318     /*---------------------------*/
2319     if (nsends) {
2320       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2321     }
2322   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2323 
2324   /* post receives, send and receive mat->a */
2325   /*----------------------------------------*/
2326   for (imdex=0; imdex<nrecvs; imdex++) {
2327     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2328   }
2329   for (i=0; i<nsends; i++){
2330     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2331   }
2332   count = nrecvs;
2333   while (count) {
2334     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2335     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2336     count--;
2337   }
2338   if (nsends) {
2339     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2340   }
2341 
2342   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2343 
2344   /* create redundant matrix */
2345   /*-------------------------*/
2346   if (reuse == MAT_INITIAL_MATRIX){
2347     /* compute rownz_max for preallocation */
2348     for (imdex=0; imdex<nrecvs; imdex++){
2349       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2350       rptr = rbuf_j[imdex];
2351       for (i=0; i<j; i++){
2352         ncols = rptr[i+1] - rptr[i];
2353         if (rownz_max < ncols) rownz_max = ncols;
2354       }
2355     }
2356 
2357     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2358     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2359     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2360     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2361     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2362   } else {
2363     C = *matredundant;
2364   }
2365 
2366   /* insert local matrix entries */
2367   rptr = sbuf_j;
2368   cols = sbuf_j + rend-rstart + 1;
2369   vals = sbuf_a;
2370   for (i=0; i<rend-rstart; i++){
2371     row   = i + rstart;
2372     ncols = rptr[i+1] - rptr[i];
2373     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2374     vals += ncols;
2375     cols += ncols;
2376   }
2377   /* insert received matrix entries */
2378   for (imdex=0; imdex<nrecvs; imdex++){
2379     rstart = rowrange[recv_rank[imdex]];
2380     rend   = rowrange[recv_rank[imdex]+1];
2381     rptr = rbuf_j[imdex];
2382     cols = rbuf_j[imdex] + rend-rstart + 1;
2383     vals = rbuf_a[imdex];
2384     for (i=0; i<rend-rstart; i++){
2385       row   = i + rstart;
2386       ncols = rptr[i+1] - rptr[i];
2387       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2388       vals += ncols;
2389       cols += ncols;
2390     }
2391   }
2392   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2393   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2394   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2395   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2396   if (reuse == MAT_INITIAL_MATRIX){
2397     PetscContainer container;
2398     *matredundant = C;
2399     /* create a supporting struct and attach it to C for reuse */
2400     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2401     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2402     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2403     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2404     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2405 
2406     redund->nzlocal = nzlocal;
2407     redund->nsends  = nsends;
2408     redund->nrecvs  = nrecvs;
2409     redund->send_rank = send_rank;
2410     redund->recv_rank = recv_rank;
2411     redund->sbuf_nz = sbuf_nz;
2412     redund->rbuf_nz = rbuf_nz;
2413     redund->sbuf_j  = sbuf_j;
2414     redund->sbuf_a  = sbuf_a;
2415     redund->rbuf_j  = rbuf_j;
2416     redund->rbuf_a  = rbuf_a;
2417 
2418     redund->MatDestroy = C->ops->destroy;
2419     C->ops->destroy    = MatDestroy_MatRedundant;
2420   }
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 #undef __FUNCT__
2425 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2426 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2427 {
2428   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2429   PetscErrorCode ierr;
2430   PetscInt       i,*idxb = 0;
2431   PetscScalar    *va,*vb;
2432   Vec            vtmp;
2433 
2434   PetscFunctionBegin;
2435   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2436   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2437   if (idx) {
2438     for (i=0; i<A->rmap->n; i++) {
2439       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2440     }
2441   }
2442 
2443   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2444   if (idx) {
2445     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2446   }
2447   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2448   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2449 
2450   for (i=0; i<A->rmap->n; i++){
2451     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2452       va[i] = vb[i];
2453       if (idx) idx[i] = a->garray[idxb[i]];
2454     }
2455   }
2456 
2457   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2458   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2459   if (idxb) {
2460     ierr = PetscFree(idxb);CHKERRQ(ierr);
2461   }
2462   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 #undef __FUNCT__
2467 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2468 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2469 {
2470   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2471   PetscErrorCode ierr;
2472   PetscInt       i,*idxb = 0;
2473   PetscScalar    *va,*vb;
2474   Vec            vtmp;
2475 
2476   PetscFunctionBegin;
2477   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2478   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2479   if (idx) {
2480     for (i=0; i<A->cmap->n; i++) {
2481       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2482     }
2483   }
2484 
2485   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2486   if (idx) {
2487     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2488   }
2489   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2490   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2491 
2492   for (i=0; i<A->rmap->n; i++){
2493     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2494       va[i] = vb[i];
2495       if (idx) idx[i] = a->garray[idxb[i]];
2496     }
2497   }
2498 
2499   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2500   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2501   if (idxb) {
2502     ierr = PetscFree(idxb);CHKERRQ(ierr);
2503   }
2504   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2505   PetscFunctionReturn(0);
2506 }
2507 
2508 #undef __FUNCT__
2509 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2510 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2511 {
2512   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2513   PetscInt       n      = A->rmap->n;
2514   PetscInt       cstart = A->cmap->rstart;
2515   PetscInt      *cmap   = mat->garray;
2516   PetscInt      *diagIdx, *offdiagIdx;
2517   Vec            diagV, offdiagV;
2518   PetscScalar   *a, *diagA, *offdiagA;
2519   PetscInt       r;
2520   PetscErrorCode ierr;
2521 
2522   PetscFunctionBegin;
2523   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2524   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2525   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2526   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2527   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2528   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2529   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2530   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2531   for(r = 0; r < n; ++r) {
2532     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2533       a[r]   = diagA[r];
2534       idx[r] = cstart + diagIdx[r];
2535     } else {
2536       a[r]   = offdiagA[r];
2537       idx[r] = cmap[offdiagIdx[r]];
2538     }
2539   }
2540   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2541   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2542   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2543   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2544   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2545   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 #undef __FUNCT__
2550 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2551 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2552 {
2553   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2554   PetscInt       n      = A->rmap->n;
2555   PetscInt       cstart = A->cmap->rstart;
2556   PetscInt      *cmap   = mat->garray;
2557   PetscInt      *diagIdx, *offdiagIdx;
2558   Vec            diagV, offdiagV;
2559   PetscScalar   *a, *diagA, *offdiagA;
2560   PetscInt       r;
2561   PetscErrorCode ierr;
2562 
2563   PetscFunctionBegin;
2564   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2565   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2566   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2567   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2568   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2569   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2570   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2571   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2572   for(r = 0; r < n; ++r) {
2573     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2574       a[r]   = diagA[r];
2575       idx[r] = cstart + diagIdx[r];
2576     } else {
2577       a[r]   = offdiagA[r];
2578       idx[r] = cmap[offdiagIdx[r]];
2579     }
2580   }
2581   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2582   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2583   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2584   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2585   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2586   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 #undef __FUNCT__
2591 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2592 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2593 {
2594   PetscErrorCode ierr;
2595   Mat            *dummy;
2596 
2597   PetscFunctionBegin;
2598   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2599   *newmat = *dummy;
2600   ierr = PetscFree(dummy);CHKERRQ(ierr);
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2605 /* -------------------------------------------------------------------*/
2606 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2607        MatGetRow_MPIAIJ,
2608        MatRestoreRow_MPIAIJ,
2609        MatMult_MPIAIJ,
2610 /* 4*/ MatMultAdd_MPIAIJ,
2611        MatMultTranspose_MPIAIJ,
2612        MatMultTransposeAdd_MPIAIJ,
2613 #ifdef PETSC_HAVE_PBGL
2614        MatSolve_MPIAIJ,
2615 #else
2616        0,
2617 #endif
2618        0,
2619        0,
2620 /*10*/ 0,
2621        0,
2622        0,
2623        MatSOR_MPIAIJ,
2624        MatTranspose_MPIAIJ,
2625 /*15*/ MatGetInfo_MPIAIJ,
2626        MatEqual_MPIAIJ,
2627        MatGetDiagonal_MPIAIJ,
2628        MatDiagonalScale_MPIAIJ,
2629        MatNorm_MPIAIJ,
2630 /*20*/ MatAssemblyBegin_MPIAIJ,
2631        MatAssemblyEnd_MPIAIJ,
2632        MatSetOption_MPIAIJ,
2633        MatZeroEntries_MPIAIJ,
2634 /*24*/ MatZeroRows_MPIAIJ,
2635        0,
2636 #ifdef PETSC_HAVE_PBGL
2637        0,
2638 #else
2639        0,
2640 #endif
2641        0,
2642        0,
2643 /*29*/ MatSetUpPreallocation_MPIAIJ,
2644 #ifdef PETSC_HAVE_PBGL
2645        0,
2646 #else
2647        0,
2648 #endif
2649        0,
2650        0,
2651        0,
2652 /*34*/ MatDuplicate_MPIAIJ,
2653        0,
2654        0,
2655        0,
2656        0,
2657 /*39*/ MatAXPY_MPIAIJ,
2658        MatGetSubMatrices_MPIAIJ,
2659        MatIncreaseOverlap_MPIAIJ,
2660        MatGetValues_MPIAIJ,
2661        MatCopy_MPIAIJ,
2662 /*44*/ MatGetRowMax_MPIAIJ,
2663        MatScale_MPIAIJ,
2664        0,
2665        0,
2666        0,
2667 /*49*/ MatSetBlockSize_MPIAIJ,
2668        0,
2669        0,
2670        0,
2671        0,
2672 /*54*/ MatFDColoringCreate_MPIAIJ,
2673        0,
2674        MatSetUnfactored_MPIAIJ,
2675        MatPermute_MPIAIJ,
2676        0,
2677 /*59*/ MatGetSubMatrix_MPIAIJ,
2678        MatDestroy_MPIAIJ,
2679        MatView_MPIAIJ,
2680        0,
2681        0,
2682 /*64*/ 0,
2683        0,
2684        0,
2685        0,
2686        0,
2687 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2688        MatGetRowMinAbs_MPIAIJ,
2689        0,
2690        MatSetColoring_MPIAIJ,
2691 #if defined(PETSC_HAVE_ADIC)
2692        MatSetValuesAdic_MPIAIJ,
2693 #else
2694        0,
2695 #endif
2696        MatSetValuesAdifor_MPIAIJ,
2697 /*75*/ MatFDColoringApply_AIJ,
2698        0,
2699        0,
2700        0,
2701        0,
2702 /*80*/ 0,
2703        0,
2704        0,
2705 /*83*/ MatLoad_MPIAIJ,
2706        0,
2707        0,
2708        0,
2709        0,
2710        0,
2711 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2712        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2713        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2714        MatPtAP_Basic,
2715        MatPtAPSymbolic_MPIAIJ,
2716 /*94*/ MatPtAPNumeric_MPIAIJ,
2717        0,
2718        0,
2719        0,
2720        0,
2721 /*99*/ 0,
2722        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2723        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2724        MatConjugate_MPIAIJ,
2725        0,
2726 /*104*/MatSetValuesRow_MPIAIJ,
2727        MatRealPart_MPIAIJ,
2728        MatImaginaryPart_MPIAIJ,
2729        0,
2730        0,
2731 /*109*/0,
2732        MatGetRedundantMatrix_MPIAIJ,
2733        MatGetRowMin_MPIAIJ,
2734        0,
2735        0,
2736 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2737        0,
2738        0,
2739        0,
2740        0,
2741 /*119*/0,
2742        0,
2743        0,
2744        0,
2745        MatGetMultiProcBlock_MPIAIJ
2746 };
2747 
2748 /* ----------------------------------------------------------------------------------------*/
2749 
2750 EXTERN_C_BEGIN
2751 #undef __FUNCT__
2752 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2753 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2754 {
2755   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2756   PetscErrorCode ierr;
2757 
2758   PetscFunctionBegin;
2759   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2760   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2761   PetscFunctionReturn(0);
2762 }
2763 EXTERN_C_END
2764 
2765 EXTERN_C_BEGIN
2766 #undef __FUNCT__
2767 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2768 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2769 {
2770   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2771   PetscErrorCode ierr;
2772 
2773   PetscFunctionBegin;
2774   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2775   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2776   PetscFunctionReturn(0);
2777 }
2778 EXTERN_C_END
2779 
2780 EXTERN_C_BEGIN
2781 #undef __FUNCT__
2782 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2783 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2784 {
2785   Mat_MPIAIJ     *b;
2786   PetscErrorCode ierr;
2787   PetscInt       i;
2788 
2789   PetscFunctionBegin;
2790   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2791   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2792   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2793   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2794 
2795   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2796   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2797   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2798   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2799   if (d_nnz) {
2800     for (i=0; i<B->rmap->n; i++) {
2801       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2802     }
2803   }
2804   if (o_nnz) {
2805     for (i=0; i<B->rmap->n; i++) {
2806       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2807     }
2808   }
2809   b = (Mat_MPIAIJ*)B->data;
2810 
2811   if (!B->preallocated) {
2812     /* Explicitly create 2 MATSEQAIJ matrices. */
2813     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2814     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2815     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2816     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2817     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2818     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2819     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2820     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2821   }
2822 
2823   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2824   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2825   B->preallocated = PETSC_TRUE;
2826   PetscFunctionReturn(0);
2827 }
2828 EXTERN_C_END
2829 
2830 #undef __FUNCT__
2831 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2832 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2833 {
2834   Mat            mat;
2835   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2836   PetscErrorCode ierr;
2837 
2838   PetscFunctionBegin;
2839   *newmat       = 0;
2840   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2841   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2842   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2843   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2844   a    = (Mat_MPIAIJ*)mat->data;
2845 
2846   mat->factortype    = matin->factortype;
2847   mat->rmap->bs      = matin->rmap->bs;
2848   mat->assembled    = PETSC_TRUE;
2849   mat->insertmode   = NOT_SET_VALUES;
2850   mat->preallocated = PETSC_TRUE;
2851 
2852   a->size           = oldmat->size;
2853   a->rank           = oldmat->rank;
2854   a->donotstash     = oldmat->donotstash;
2855   a->roworiented    = oldmat->roworiented;
2856   a->rowindices     = 0;
2857   a->rowvalues      = 0;
2858   a->getrowactive   = PETSC_FALSE;
2859 
2860   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2861   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2862 
2863   if (oldmat->colmap) {
2864 #if defined (PETSC_USE_CTABLE)
2865     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2866 #else
2867     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2868     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2869     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2870 #endif
2871   } else a->colmap = 0;
2872   if (oldmat->garray) {
2873     PetscInt len;
2874     len  = oldmat->B->cmap->n;
2875     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2876     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2877     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2878   } else a->garray = 0;
2879 
2880   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2881   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2882   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2883   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2884   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2885   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2886   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2887   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2888   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2889   *newmat = mat;
2890   PetscFunctionReturn(0);
2891 }
2892 
2893 /*
2894     Allows sending/receiving larger messages then 2 gigabytes in a single call
2895 */
2896 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
2897 {
2898   int             ierr;
2899   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2900   PetscInt        i,numchunks;
2901   PetscMPIInt     icnt;
2902 
2903   numchunks = cnt/CHUNKSIZE + 1;
2904   for (i=0; i<numchunks; i++) {
2905     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2906     ierr = MPI_Send(mess,icnt,type,to,tag,comm);
2907     if (type == MPIU_INT) {
2908       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2909     } else if (type == MPIU_SCALAR) {
2910       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2911     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2912   }
2913   return 0;
2914 }
2915 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
2916 {
2917   int             ierr;
2918   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2919   MPI_Status      status;
2920   PetscInt        i,numchunks;
2921   PetscMPIInt     icnt;
2922 
2923   numchunks = cnt/CHUNKSIZE + 1;
2924   for (i=0; i<numchunks; i++) {
2925     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2926     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);
2927     if (type == MPIU_INT) {
2928       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2929     } else if (type == MPIU_SCALAR) {
2930       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2931     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2932   }
2933   return 0;
2934 }
2935 
2936 #undef __FUNCT__
2937 #define __FUNCT__ "MatLoad_MPIAIJ"
2938 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2939 {
2940   PetscScalar    *vals,*svals;
2941   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2942   PetscErrorCode ierr;
2943   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2944   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2945   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2946   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2947   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2948   int            fd;
2949 
2950   PetscFunctionBegin;
2951   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2952   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2953   if (!rank) {
2954     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2955     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2956     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2957   }
2958 
2959   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2960 
2961   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2962   M = header[1]; N = header[2];
2963   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2964   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2965   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2966 
2967   /* If global sizes are set, check if they are consistent with that given in the file */
2968   if (sizesset) {
2969     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2970   }
2971   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2972   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2973 
2974   /* determine ownership of all rows */
2975   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
2976   else m = newMat->rmap->n; /* Set by user */
2977 
2978   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2979   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2980 
2981   /* First process needs enough room for process with most rows */
2982   if (!rank) {
2983     mmax       = rowners[1];
2984     for (i=2; i<size; i++) {
2985       mmax = PetscMax(mmax,rowners[i]);
2986     }
2987   } else mmax = m;
2988 
2989   rowners[0] = 0;
2990   for (i=2; i<=size; i++) {
2991     rowners[i] += rowners[i-1];
2992   }
2993   rstart = rowners[rank];
2994   rend   = rowners[rank+1];
2995 
2996   /* distribute row lengths to all processors */
2997   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2998   if (!rank) {
2999     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3000     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3001     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3002     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3003     for (j=0; j<m; j++) {
3004       procsnz[0] += ourlens[j];
3005     }
3006     for (i=1; i<size; i++) {
3007       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3008       /* calculate the number of nonzeros on each processor */
3009       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3010         procsnz[i] += rowlengths[j];
3011       }
3012       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3013     }
3014     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3015   } else {
3016     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3017   }
3018 
3019   if (!rank) {
3020     /* determine max buffer needed and allocate it */
3021     maxnz = 0;
3022     for (i=0; i<size; i++) {
3023       maxnz = PetscMax(maxnz,procsnz[i]);
3024     }
3025     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3026 
3027     /* read in my part of the matrix column indices  */
3028     nz   = procsnz[0];
3029     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3030     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3031 
3032     /* read in every one elses and ship off */
3033     for (i=1; i<size; i++) {
3034       nz     = procsnz[i];
3035       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3036       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3037     }
3038     ierr = PetscFree(cols);CHKERRQ(ierr);
3039   } else {
3040     /* determine buffer space needed for message */
3041     nz = 0;
3042     for (i=0; i<m; i++) {
3043       nz += ourlens[i];
3044     }
3045     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3046 
3047     /* receive message of column indices*/
3048     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3049   }
3050 
3051   /* determine column ownership if matrix is not square */
3052   if (N != M) {
3053     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3054     else n = newMat->cmap->n;
3055     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3056     cstart = cend - n;
3057   } else {
3058     cstart = rstart;
3059     cend   = rend;
3060     n      = cend - cstart;
3061   }
3062 
3063   /* loop over local rows, determining number of off diagonal entries */
3064   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3065   jj = 0;
3066   for (i=0; i<m; i++) {
3067     for (j=0; j<ourlens[i]; j++) {
3068       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3069       jj++;
3070     }
3071   }
3072 
3073   for (i=0; i<m; i++) {
3074     ourlens[i] -= offlens[i];
3075   }
3076   if (!sizesset) {
3077     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3078   }
3079   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3080 
3081   for (i=0; i<m; i++) {
3082     ourlens[i] += offlens[i];
3083   }
3084 
3085   if (!rank) {
3086     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3087 
3088     /* read in my part of the matrix numerical values  */
3089     nz   = procsnz[0];
3090     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3091 
3092     /* insert into matrix */
3093     jj      = rstart;
3094     smycols = mycols;
3095     svals   = vals;
3096     for (i=0; i<m; i++) {
3097       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3098       smycols += ourlens[i];
3099       svals   += ourlens[i];
3100       jj++;
3101     }
3102 
3103     /* read in other processors and ship out */
3104     for (i=1; i<size; i++) {
3105       nz     = procsnz[i];
3106       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3107       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3108     }
3109     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3110   } else {
3111     /* receive numeric values */
3112     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3113 
3114     /* receive message of values*/
3115     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3116 
3117     /* insert into matrix */
3118     jj      = rstart;
3119     smycols = mycols;
3120     svals   = vals;
3121     for (i=0; i<m; i++) {
3122       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3123       smycols += ourlens[i];
3124       svals   += ourlens[i];
3125       jj++;
3126     }
3127   }
3128   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3129   ierr = PetscFree(vals);CHKERRQ(ierr);
3130   ierr = PetscFree(mycols);CHKERRQ(ierr);
3131   ierr = PetscFree(rowners);CHKERRQ(ierr);
3132 
3133   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3134   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3135   PetscFunctionReturn(0);
3136 }
3137 
3138 #undef __FUNCT__
3139 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3140 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3141 {
3142   PetscErrorCode ierr;
3143   IS             iscol_local;
3144   PetscInt       csize;
3145 
3146   PetscFunctionBegin;
3147   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3148   if (call == MAT_REUSE_MATRIX) {
3149     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3150     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3151   } else {
3152     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3153   }
3154   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3155   if (call == MAT_INITIAL_MATRIX) {
3156     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3157     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3158   }
3159   PetscFunctionReturn(0);
3160 }
3161 
3162 #undef __FUNCT__
3163 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3164 /*
3165     Not great since it makes two copies of the submatrix, first an SeqAIJ
3166   in local and then by concatenating the local matrices the end result.
3167   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3168 
3169   Note: This requires a sequential iscol with all indices.
3170 */
3171 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3172 {
3173   PetscErrorCode ierr;
3174   PetscMPIInt    rank,size;
3175   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3176   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3177   Mat            *local,M,Mreuse;
3178   MatScalar      *vwork,*aa;
3179   MPI_Comm       comm = ((PetscObject)mat)->comm;
3180   Mat_SeqAIJ     *aij;
3181 
3182 
3183   PetscFunctionBegin;
3184   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3185   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3186 
3187   if (call ==  MAT_REUSE_MATRIX) {
3188     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3189     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3190     local = &Mreuse;
3191     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3192   } else {
3193     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3194     Mreuse = *local;
3195     ierr   = PetscFree(local);CHKERRQ(ierr);
3196   }
3197 
3198   /*
3199       m - number of local rows
3200       n - number of columns (same on all processors)
3201       rstart - first row in new global matrix generated
3202   */
3203   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3204   if (call == MAT_INITIAL_MATRIX) {
3205     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3206     ii  = aij->i;
3207     jj  = aij->j;
3208 
3209     /*
3210         Determine the number of non-zeros in the diagonal and off-diagonal
3211         portions of the matrix in order to do correct preallocation
3212     */
3213 
3214     /* first get start and end of "diagonal" columns */
3215     if (csize == PETSC_DECIDE) {
3216       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3217       if (mglobal == n) { /* square matrix */
3218 	nlocal = m;
3219       } else {
3220         nlocal = n/size + ((n % size) > rank);
3221       }
3222     } else {
3223       nlocal = csize;
3224     }
3225     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3226     rstart = rend - nlocal;
3227     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3228 
3229     /* next, compute all the lengths */
3230     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3231     olens = dlens + m;
3232     for (i=0; i<m; i++) {
3233       jend = ii[i+1] - ii[i];
3234       olen = 0;
3235       dlen = 0;
3236       for (j=0; j<jend; j++) {
3237         if (*jj < rstart || *jj >= rend) olen++;
3238         else dlen++;
3239         jj++;
3240       }
3241       olens[i] = olen;
3242       dlens[i] = dlen;
3243     }
3244     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3245     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3246     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3247     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3248     ierr = PetscFree(dlens);CHKERRQ(ierr);
3249   } else {
3250     PetscInt ml,nl;
3251 
3252     M = *newmat;
3253     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3254     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3255     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3256     /*
3257          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3258        rather than the slower MatSetValues().
3259     */
3260     M->was_assembled = PETSC_TRUE;
3261     M->assembled     = PETSC_FALSE;
3262   }
3263   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3264   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3265   ii  = aij->i;
3266   jj  = aij->j;
3267   aa  = aij->a;
3268   for (i=0; i<m; i++) {
3269     row   = rstart + i;
3270     nz    = ii[i+1] - ii[i];
3271     cwork = jj;     jj += nz;
3272     vwork = aa;     aa += nz;
3273     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3274   }
3275 
3276   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3277   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3278   *newmat = M;
3279 
3280   /* save submatrix used in processor for next request */
3281   if (call ==  MAT_INITIAL_MATRIX) {
3282     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3283     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3284   }
3285 
3286   PetscFunctionReturn(0);
3287 }
3288 
3289 EXTERN_C_BEGIN
3290 #undef __FUNCT__
3291 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3292 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3293 {
3294   PetscInt       m,cstart, cend,j,nnz,i,d;
3295   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3296   const PetscInt *JJ;
3297   PetscScalar    *values;
3298   PetscErrorCode ierr;
3299 
3300   PetscFunctionBegin;
3301   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3302 
3303   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3304   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3305   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3306   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3307   m      = B->rmap->n;
3308   cstart = B->cmap->rstart;
3309   cend   = B->cmap->rend;
3310   rstart = B->rmap->rstart;
3311 
3312   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3313 
3314 #if defined(PETSC_USE_DEBUGGING)
3315   for (i=0; i<m; i++) {
3316     nnz     = Ii[i+1]- Ii[i];
3317     JJ      = J + Ii[i];
3318     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3319     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3320     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3321   }
3322 #endif
3323 
3324   for (i=0; i<m; i++) {
3325     nnz     = Ii[i+1]- Ii[i];
3326     JJ      = J + Ii[i];
3327     nnz_max = PetscMax(nnz_max,nnz);
3328     d       = 0;
3329     for (j=0; j<nnz; j++) {
3330       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3331     }
3332     d_nnz[i] = d;
3333     o_nnz[i] = nnz - d;
3334   }
3335   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3336   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3337 
3338   if (v) values = (PetscScalar*)v;
3339   else {
3340     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3341     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3342   }
3343 
3344   for (i=0; i<m; i++) {
3345     ii   = i + rstart;
3346     nnz  = Ii[i+1]- Ii[i];
3347     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3348   }
3349   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3350   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3351 
3352   if (!v) {
3353     ierr = PetscFree(values);CHKERRQ(ierr);
3354   }
3355   PetscFunctionReturn(0);
3356 }
3357 EXTERN_C_END
3358 
3359 #undef __FUNCT__
3360 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3361 /*@
3362    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3363    (the default parallel PETSc format).
3364 
3365    Collective on MPI_Comm
3366 
3367    Input Parameters:
3368 +  B - the matrix
3369 .  i - the indices into j for the start of each local row (starts with zero)
3370 .  j - the column indices for each local row (starts with zero)
3371 -  v - optional values in the matrix
3372 
3373    Level: developer
3374 
3375    Notes:
3376        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3377      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3378      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3379 
3380        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3381 
3382        The format which is used for the sparse matrix input, is equivalent to a
3383     row-major ordering.. i.e for the following matrix, the input data expected is
3384     as shown:
3385 
3386         1 0 0
3387         2 0 3     P0
3388        -------
3389         4 5 6     P1
3390 
3391      Process0 [P0]: rows_owned=[0,1]
3392         i =  {0,1,3}  [size = nrow+1  = 2+1]
3393         j =  {0,0,2}  [size = nz = 6]
3394         v =  {1,2,3}  [size = nz = 6]
3395 
3396      Process1 [P1]: rows_owned=[2]
3397         i =  {0,3}    [size = nrow+1  = 1+1]
3398         j =  {0,1,2}  [size = nz = 6]
3399         v =  {4,5,6}  [size = nz = 6]
3400 
3401 .keywords: matrix, aij, compressed row, sparse, parallel
3402 
3403 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3404           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3405 @*/
3406 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3407 {
3408   PetscErrorCode ierr;
3409 
3410   PetscFunctionBegin;
3411   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3412   PetscFunctionReturn(0);
3413 }
3414 
3415 #undef __FUNCT__
3416 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3417 /*@C
3418    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3419    (the default parallel PETSc format).  For good matrix assembly performance
3420    the user should preallocate the matrix storage by setting the parameters
3421    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3422    performance can be increased by more than a factor of 50.
3423 
3424    Collective on MPI_Comm
3425 
3426    Input Parameters:
3427 +  A - the matrix
3428 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3429            (same value is used for all local rows)
3430 .  d_nnz - array containing the number of nonzeros in the various rows of the
3431            DIAGONAL portion of the local submatrix (possibly different for each row)
3432            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3433            The size of this array is equal to the number of local rows, i.e 'm'.
3434            You must leave room for the diagonal entry even if it is zero.
3435 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3436            submatrix (same value is used for all local rows).
3437 -  o_nnz - array containing the number of nonzeros in the various rows of the
3438            OFF-DIAGONAL portion of the local submatrix (possibly different for
3439            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3440            structure. The size of this array is equal to the number
3441            of local rows, i.e 'm'.
3442 
3443    If the *_nnz parameter is given then the *_nz parameter is ignored
3444 
3445    The AIJ format (also called the Yale sparse matrix format or
3446    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3447    storage.  The stored row and column indices begin with zero.
3448    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3449 
3450    The parallel matrix is partitioned such that the first m0 rows belong to
3451    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3452    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3453 
3454    The DIAGONAL portion of the local submatrix of a processor can be defined
3455    as the submatrix which is obtained by extraction the part corresponding to
3456    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3457    first row that belongs to the processor, r2 is the last row belonging to
3458    the this processor, and c1-c2 is range of indices of the local part of a
3459    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3460    common case of a square matrix, the row and column ranges are the same and
3461    the DIAGONAL part is also square. The remaining portion of the local
3462    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3463 
3464    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3465 
3466    You can call MatGetInfo() to get information on how effective the preallocation was;
3467    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3468    You can also run with the option -info and look for messages with the string
3469    malloc in them to see if additional memory allocation was needed.
3470 
3471    Example usage:
3472 
3473    Consider the following 8x8 matrix with 34 non-zero values, that is
3474    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3475    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3476    as follows:
3477 
3478 .vb
3479             1  2  0  |  0  3  0  |  0  4
3480     Proc0   0  5  6  |  7  0  0  |  8  0
3481             9  0 10  | 11  0  0  | 12  0
3482     -------------------------------------
3483            13  0 14  | 15 16 17  |  0  0
3484     Proc1   0 18  0  | 19 20 21  |  0  0
3485             0  0  0  | 22 23  0  | 24  0
3486     -------------------------------------
3487     Proc2  25 26 27  |  0  0 28  | 29  0
3488            30  0  0  | 31 32 33  |  0 34
3489 .ve
3490 
3491    This can be represented as a collection of submatrices as:
3492 
3493 .vb
3494       A B C
3495       D E F
3496       G H I
3497 .ve
3498 
3499    Where the submatrices A,B,C are owned by proc0, D,E,F are
3500    owned by proc1, G,H,I are owned by proc2.
3501 
3502    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3503    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3504    The 'M','N' parameters are 8,8, and have the same values on all procs.
3505 
3506    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3507    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3508    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3509    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3510    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3511    matrix, ans [DF] as another SeqAIJ matrix.
3512 
3513    When d_nz, o_nz parameters are specified, d_nz storage elements are
3514    allocated for every row of the local diagonal submatrix, and o_nz
3515    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3516    One way to choose d_nz and o_nz is to use the max nonzerors per local
3517    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3518    In this case, the values of d_nz,o_nz are:
3519 .vb
3520      proc0 : dnz = 2, o_nz = 2
3521      proc1 : dnz = 3, o_nz = 2
3522      proc2 : dnz = 1, o_nz = 4
3523 .ve
3524    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3525    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3526    for proc3. i.e we are using 12+15+10=37 storage locations to store
3527    34 values.
3528 
3529    When d_nnz, o_nnz parameters are specified, the storage is specified
3530    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3531    In the above case the values for d_nnz,o_nnz are:
3532 .vb
3533      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3534      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3535      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3536 .ve
3537    Here the space allocated is sum of all the above values i.e 34, and
3538    hence pre-allocation is perfect.
3539 
3540    Level: intermediate
3541 
3542 .keywords: matrix, aij, compressed row, sparse, parallel
3543 
3544 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3545           MPIAIJ, MatGetInfo()
3546 @*/
3547 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3548 {
3549   PetscErrorCode ierr;
3550 
3551   PetscFunctionBegin;
3552   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3553   PetscFunctionReturn(0);
3554 }
3555 
3556 #undef __FUNCT__
3557 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3558 /*@
3559      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3560          CSR format the local rows.
3561 
3562    Collective on MPI_Comm
3563 
3564    Input Parameters:
3565 +  comm - MPI communicator
3566 .  m - number of local rows (Cannot be PETSC_DECIDE)
3567 .  n - This value should be the same as the local size used in creating the
3568        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3569        calculated if N is given) For square matrices n is almost always m.
3570 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3571 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3572 .   i - row indices
3573 .   j - column indices
3574 -   a - matrix values
3575 
3576    Output Parameter:
3577 .   mat - the matrix
3578 
3579    Level: intermediate
3580 
3581    Notes:
3582        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3583      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3584      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3585 
3586        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3587 
3588        The format which is used for the sparse matrix input, is equivalent to a
3589     row-major ordering.. i.e for the following matrix, the input data expected is
3590     as shown:
3591 
3592         1 0 0
3593         2 0 3     P0
3594        -------
3595         4 5 6     P1
3596 
3597      Process0 [P0]: rows_owned=[0,1]
3598         i =  {0,1,3}  [size = nrow+1  = 2+1]
3599         j =  {0,0,2}  [size = nz = 6]
3600         v =  {1,2,3}  [size = nz = 6]
3601 
3602      Process1 [P1]: rows_owned=[2]
3603         i =  {0,3}    [size = nrow+1  = 1+1]
3604         j =  {0,1,2}  [size = nz = 6]
3605         v =  {4,5,6}  [size = nz = 6]
3606 
3607 .keywords: matrix, aij, compressed row, sparse, parallel
3608 
3609 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3610           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3611 @*/
3612 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3613 {
3614   PetscErrorCode ierr;
3615 
3616  PetscFunctionBegin;
3617   if (i[0]) {
3618     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3619   }
3620   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3621   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3622   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3623   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3624   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3625   PetscFunctionReturn(0);
3626 }
3627 
3628 #undef __FUNCT__
3629 #define __FUNCT__ "MatCreateMPIAIJ"
3630 /*@C
3631    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3632    (the default parallel PETSc format).  For good matrix assembly performance
3633    the user should preallocate the matrix storage by setting the parameters
3634    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3635    performance can be increased by more than a factor of 50.
3636 
3637    Collective on MPI_Comm
3638 
3639    Input Parameters:
3640 +  comm - MPI communicator
3641 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3642            This value should be the same as the local size used in creating the
3643            y vector for the matrix-vector product y = Ax.
3644 .  n - This value should be the same as the local size used in creating the
3645        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3646        calculated if N is given) For square matrices n is almost always m.
3647 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3648 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3649 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3650            (same value is used for all local rows)
3651 .  d_nnz - array containing the number of nonzeros in the various rows of the
3652            DIAGONAL portion of the local submatrix (possibly different for each row)
3653            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3654            The size of this array is equal to the number of local rows, i.e 'm'.
3655            You must leave room for the diagonal entry even if it is zero.
3656 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3657            submatrix (same value is used for all local rows).
3658 -  o_nnz - array containing the number of nonzeros in the various rows of the
3659            OFF-DIAGONAL portion of the local submatrix (possibly different for
3660            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3661            structure. The size of this array is equal to the number
3662            of local rows, i.e 'm'.
3663 
3664    Output Parameter:
3665 .  A - the matrix
3666 
3667    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3668    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3669    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3670 
3671    Notes:
3672    If the *_nnz parameter is given then the *_nz parameter is ignored
3673 
3674    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3675    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3676    storage requirements for this matrix.
3677 
3678    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3679    processor than it must be used on all processors that share the object for
3680    that argument.
3681 
3682    The user MUST specify either the local or global matrix dimensions
3683    (possibly both).
3684 
3685    The parallel matrix is partitioned across processors such that the
3686    first m0 rows belong to process 0, the next m1 rows belong to
3687    process 1, the next m2 rows belong to process 2 etc.. where
3688    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3689    values corresponding to [m x N] submatrix.
3690 
3691    The columns are logically partitioned with the n0 columns belonging
3692    to 0th partition, the next n1 columns belonging to the next
3693    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3694 
3695    The DIAGONAL portion of the local submatrix on any given processor
3696    is the submatrix corresponding to the rows and columns m,n
3697    corresponding to the given processor. i.e diagonal matrix on
3698    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3699    etc. The remaining portion of the local submatrix [m x (N-n)]
3700    constitute the OFF-DIAGONAL portion. The example below better
3701    illustrates this concept.
3702 
3703    For a square global matrix we define each processor's diagonal portion
3704    to be its local rows and the corresponding columns (a square submatrix);
3705    each processor's off-diagonal portion encompasses the remainder of the
3706    local matrix (a rectangular submatrix).
3707 
3708    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3709 
3710    When calling this routine with a single process communicator, a matrix of
3711    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3712    type of communicator, use the construction mechanism:
3713      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3714 
3715    By default, this format uses inodes (identical nodes) when possible.
3716    We search for consecutive rows with the same nonzero structure, thereby
3717    reusing matrix information to achieve increased efficiency.
3718 
3719    Options Database Keys:
3720 +  -mat_no_inode  - Do not use inodes
3721 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3722 -  -mat_aij_oneindex - Internally use indexing starting at 1
3723         rather than 0.  Note that when calling MatSetValues(),
3724         the user still MUST index entries starting at 0!
3725 
3726 
3727    Example usage:
3728 
3729    Consider the following 8x8 matrix with 34 non-zero values, that is
3730    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3731    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3732    as follows:
3733 
3734 .vb
3735             1  2  0  |  0  3  0  |  0  4
3736     Proc0   0  5  6  |  7  0  0  |  8  0
3737             9  0 10  | 11  0  0  | 12  0
3738     -------------------------------------
3739            13  0 14  | 15 16 17  |  0  0
3740     Proc1   0 18  0  | 19 20 21  |  0  0
3741             0  0  0  | 22 23  0  | 24  0
3742     -------------------------------------
3743     Proc2  25 26 27  |  0  0 28  | 29  0
3744            30  0  0  | 31 32 33  |  0 34
3745 .ve
3746 
3747    This can be represented as a collection of submatrices as:
3748 
3749 .vb
3750       A B C
3751       D E F
3752       G H I
3753 .ve
3754 
3755    Where the submatrices A,B,C are owned by proc0, D,E,F are
3756    owned by proc1, G,H,I are owned by proc2.
3757 
3758    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3759    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3760    The 'M','N' parameters are 8,8, and have the same values on all procs.
3761 
3762    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3763    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3764    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3765    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3766    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3767    matrix, ans [DF] as another SeqAIJ matrix.
3768 
3769    When d_nz, o_nz parameters are specified, d_nz storage elements are
3770    allocated for every row of the local diagonal submatrix, and o_nz
3771    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3772    One way to choose d_nz and o_nz is to use the max nonzerors per local
3773    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3774    In this case, the values of d_nz,o_nz are:
3775 .vb
3776      proc0 : dnz = 2, o_nz = 2
3777      proc1 : dnz = 3, o_nz = 2
3778      proc2 : dnz = 1, o_nz = 4
3779 .ve
3780    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3781    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3782    for proc3. i.e we are using 12+15+10=37 storage locations to store
3783    34 values.
3784 
3785    When d_nnz, o_nnz parameters are specified, the storage is specified
3786    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3787    In the above case the values for d_nnz,o_nnz are:
3788 .vb
3789      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3790      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3791      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3792 .ve
3793    Here the space allocated is sum of all the above values i.e 34, and
3794    hence pre-allocation is perfect.
3795 
3796    Level: intermediate
3797 
3798 .keywords: matrix, aij, compressed row, sparse, parallel
3799 
3800 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3801           MPIAIJ, MatCreateMPIAIJWithArrays()
3802 @*/
3803 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3804 {
3805   PetscErrorCode ierr;
3806   PetscMPIInt    size;
3807 
3808   PetscFunctionBegin;
3809   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3810   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3811   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3812   if (size > 1) {
3813     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3814     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3815   } else {
3816     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3817     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3818   }
3819   PetscFunctionReturn(0);
3820 }
3821 
3822 #undef __FUNCT__
3823 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3824 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3825 {
3826   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3827 
3828   PetscFunctionBegin;
3829   *Ad     = a->A;
3830   *Ao     = a->B;
3831   *colmap = a->garray;
3832   PetscFunctionReturn(0);
3833 }
3834 
3835 #undef __FUNCT__
3836 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3837 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3838 {
3839   PetscErrorCode ierr;
3840   PetscInt       i;
3841   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3842 
3843   PetscFunctionBegin;
3844   if (coloring->ctype == IS_COLORING_GLOBAL) {
3845     ISColoringValue *allcolors,*colors;
3846     ISColoring      ocoloring;
3847 
3848     /* set coloring for diagonal portion */
3849     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3850 
3851     /* set coloring for off-diagonal portion */
3852     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3853     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3854     for (i=0; i<a->B->cmap->n; i++) {
3855       colors[i] = allcolors[a->garray[i]];
3856     }
3857     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3858     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3859     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3860     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3861   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3862     ISColoringValue *colors;
3863     PetscInt        *larray;
3864     ISColoring      ocoloring;
3865 
3866     /* set coloring for diagonal portion */
3867     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3868     for (i=0; i<a->A->cmap->n; i++) {
3869       larray[i] = i + A->cmap->rstart;
3870     }
3871     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3872     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3873     for (i=0; i<a->A->cmap->n; i++) {
3874       colors[i] = coloring->colors[larray[i]];
3875     }
3876     ierr = PetscFree(larray);CHKERRQ(ierr);
3877     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3878     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3879     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3880 
3881     /* set coloring for off-diagonal portion */
3882     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3883     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3884     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3885     for (i=0; i<a->B->cmap->n; i++) {
3886       colors[i] = coloring->colors[larray[i]];
3887     }
3888     ierr = PetscFree(larray);CHKERRQ(ierr);
3889     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3890     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3891     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3892   } else {
3893     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3894   }
3895 
3896   PetscFunctionReturn(0);
3897 }
3898 
3899 #if defined(PETSC_HAVE_ADIC)
3900 #undef __FUNCT__
3901 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3902 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3903 {
3904   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3905   PetscErrorCode ierr;
3906 
3907   PetscFunctionBegin;
3908   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3909   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3910   PetscFunctionReturn(0);
3911 }
3912 #endif
3913 
3914 #undef __FUNCT__
3915 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3916 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3917 {
3918   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3919   PetscErrorCode ierr;
3920 
3921   PetscFunctionBegin;
3922   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3923   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3924   PetscFunctionReturn(0);
3925 }
3926 
3927 #undef __FUNCT__
3928 #define __FUNCT__ "MatMerge"
3929 /*@
3930       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3931                  matrices from each processor
3932 
3933     Collective on MPI_Comm
3934 
3935    Input Parameters:
3936 +    comm - the communicators the parallel matrix will live on
3937 .    inmat - the input sequential matrices
3938 .    n - number of local columns (or PETSC_DECIDE)
3939 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3940 
3941    Output Parameter:
3942 .    outmat - the parallel matrix generated
3943 
3944     Level: advanced
3945 
3946    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3947 
3948 @*/
3949 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3950 {
3951   PetscErrorCode ierr;
3952   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3953   PetscInt       *indx;
3954   PetscScalar    *values;
3955 
3956   PetscFunctionBegin;
3957   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3958   if (scall == MAT_INITIAL_MATRIX){
3959     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3960     if (n == PETSC_DECIDE){
3961       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3962     }
3963     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3964     rstart -= m;
3965 
3966     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3967     for (i=0;i<m;i++) {
3968       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3969       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3970       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3971     }
3972     /* This routine will ONLY return MPIAIJ type matrix */
3973     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3974     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3975     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3976     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3977     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3978 
3979   } else if (scall == MAT_REUSE_MATRIX){
3980     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3981   } else {
3982     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3983   }
3984 
3985   for (i=0;i<m;i++) {
3986     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3987     Ii    = i + rstart;
3988     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3989     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3990   }
3991   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3992   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3993   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3994 
3995   PetscFunctionReturn(0);
3996 }
3997 
3998 #undef __FUNCT__
3999 #define __FUNCT__ "MatFileSplit"
4000 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4001 {
4002   PetscErrorCode    ierr;
4003   PetscMPIInt       rank;
4004   PetscInt          m,N,i,rstart,nnz;
4005   size_t            len;
4006   const PetscInt    *indx;
4007   PetscViewer       out;
4008   char              *name;
4009   Mat               B;
4010   const PetscScalar *values;
4011 
4012   PetscFunctionBegin;
4013   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4014   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4015   /* Should this be the type of the diagonal block of A? */
4016   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4017   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4018   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4019   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4020   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4021   for (i=0;i<m;i++) {
4022     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4023     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4024     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4025   }
4026   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4027   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4028 
4029   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4030   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4031   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4032   sprintf(name,"%s.%d",outfile,rank);
4033   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4034   ierr = PetscFree(name);
4035   ierr = MatView(B,out);CHKERRQ(ierr);
4036   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4037   ierr = MatDestroy(B);CHKERRQ(ierr);
4038   PetscFunctionReturn(0);
4039 }
4040 
4041 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
4042 #undef __FUNCT__
4043 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4044 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4045 {
4046   PetscErrorCode       ierr;
4047   Mat_Merge_SeqsToMPI  *merge;
4048   PetscContainer       container;
4049 
4050   PetscFunctionBegin;
4051   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4052   if (container) {
4053     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4054     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4055     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4056     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4057     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4058     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4059     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4060     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4061     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4062     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4063     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4064     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4065     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4066     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4067 
4068     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4069     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4070   }
4071   ierr = PetscFree(merge);CHKERRQ(ierr);
4072 
4073   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4074   PetscFunctionReturn(0);
4075 }
4076 
4077 #include "../src/mat/utils/freespace.h"
4078 #include "petscbt.h"
4079 
4080 #undef __FUNCT__
4081 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4082 /*@C
4083       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4084                  matrices from each processor
4085 
4086     Collective on MPI_Comm
4087 
4088    Input Parameters:
4089 +    comm - the communicators the parallel matrix will live on
4090 .    seqmat - the input sequential matrices
4091 .    m - number of local rows (or PETSC_DECIDE)
4092 .    n - number of local columns (or PETSC_DECIDE)
4093 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4094 
4095    Output Parameter:
4096 .    mpimat - the parallel matrix generated
4097 
4098     Level: advanced
4099 
4100    Notes:
4101      The dimensions of the sequential matrix in each processor MUST be the same.
4102      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4103      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4104 @*/
4105 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4106 {
4107   PetscErrorCode       ierr;
4108   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4109   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4110   PetscMPIInt          size,rank,taga,*len_s;
4111   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4112   PetscInt             proc,m;
4113   PetscInt             **buf_ri,**buf_rj;
4114   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4115   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4116   MPI_Request          *s_waits,*r_waits;
4117   MPI_Status           *status;
4118   MatScalar            *aa=a->a;
4119   MatScalar            **abuf_r,*ba_i;
4120   Mat_Merge_SeqsToMPI  *merge;
4121   PetscContainer       container;
4122 
4123   PetscFunctionBegin;
4124   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4125 
4126   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4127   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4128 
4129   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4130   if (container) {
4131     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4132   }
4133   bi     = merge->bi;
4134   bj     = merge->bj;
4135   buf_ri = merge->buf_ri;
4136   buf_rj = merge->buf_rj;
4137 
4138   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4139   owners = merge->rowmap->range;
4140   len_s  = merge->len_s;
4141 
4142   /* send and recv matrix values */
4143   /*-----------------------------*/
4144   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4145   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4146 
4147   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4148   for (proc=0,k=0; proc<size; proc++){
4149     if (!len_s[proc]) continue;
4150     i = owners[proc];
4151     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4152     k++;
4153   }
4154 
4155   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4156   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4157   ierr = PetscFree(status);CHKERRQ(ierr);
4158 
4159   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4160   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4161 
4162   /* insert mat values of mpimat */
4163   /*----------------------------*/
4164   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4165   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4166 
4167   for (k=0; k<merge->nrecv; k++){
4168     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4169     nrows = *(buf_ri_k[k]);
4170     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4171     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4172   }
4173 
4174   /* set values of ba */
4175   m = merge->rowmap->n;
4176   for (i=0; i<m; i++) {
4177     arow = owners[rank] + i;
4178     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4179     bnzi = bi[i+1] - bi[i];
4180     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4181 
4182     /* add local non-zero vals of this proc's seqmat into ba */
4183     anzi = ai[arow+1] - ai[arow];
4184     aj   = a->j + ai[arow];
4185     aa   = a->a + ai[arow];
4186     nextaj = 0;
4187     for (j=0; nextaj<anzi; j++){
4188       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4189         ba_i[j] += aa[nextaj++];
4190       }
4191     }
4192 
4193     /* add received vals into ba */
4194     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4195       /* i-th row */
4196       if (i == *nextrow[k]) {
4197         anzi = *(nextai[k]+1) - *nextai[k];
4198         aj   = buf_rj[k] + *(nextai[k]);
4199         aa   = abuf_r[k] + *(nextai[k]);
4200         nextaj = 0;
4201         for (j=0; nextaj<anzi; j++){
4202           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4203             ba_i[j] += aa[nextaj++];
4204           }
4205         }
4206         nextrow[k]++; nextai[k]++;
4207       }
4208     }
4209     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4210   }
4211   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4212   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4213 
4214   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4215   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4216   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4217   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4218   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4219   PetscFunctionReturn(0);
4220 }
4221 
4222 #undef __FUNCT__
4223 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4224 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4225 {
4226   PetscErrorCode       ierr;
4227   Mat                  B_mpi;
4228   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4229   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4230   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4231   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4232   PetscInt             len,proc,*dnz,*onz;
4233   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4234   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4235   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4236   MPI_Status           *status;
4237   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4238   PetscBT              lnkbt;
4239   Mat_Merge_SeqsToMPI  *merge;
4240   PetscContainer       container;
4241 
4242   PetscFunctionBegin;
4243   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4244 
4245   /* make sure it is a PETSc comm */
4246   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4247   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4249 
4250   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4251   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4252 
4253   /* determine row ownership */
4254   /*---------------------------------------------------------*/
4255   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4256   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4257   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4258   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4259   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4260   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4261   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4262 
4263   m      = merge->rowmap->n;
4264   M      = merge->rowmap->N;
4265   owners = merge->rowmap->range;
4266 
4267   /* determine the number of messages to send, their lengths */
4268   /*---------------------------------------------------------*/
4269   len_s  = merge->len_s;
4270 
4271   len = 0;  /* length of buf_si[] */
4272   merge->nsend = 0;
4273   for (proc=0; proc<size; proc++){
4274     len_si[proc] = 0;
4275     if (proc == rank){
4276       len_s[proc] = 0;
4277     } else {
4278       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4279       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4280     }
4281     if (len_s[proc]) {
4282       merge->nsend++;
4283       nrows = 0;
4284       for (i=owners[proc]; i<owners[proc+1]; i++){
4285         if (ai[i+1] > ai[i]) nrows++;
4286       }
4287       len_si[proc] = 2*(nrows+1);
4288       len += len_si[proc];
4289     }
4290   }
4291 
4292   /* determine the number and length of messages to receive for ij-structure */
4293   /*-------------------------------------------------------------------------*/
4294   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4295   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4296 
4297   /* post the Irecv of j-structure */
4298   /*-------------------------------*/
4299   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4300   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4301 
4302   /* post the Isend of j-structure */
4303   /*--------------------------------*/
4304   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4305 
4306   for (proc=0, k=0; proc<size; proc++){
4307     if (!len_s[proc]) continue;
4308     i = owners[proc];
4309     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4310     k++;
4311   }
4312 
4313   /* receives and sends of j-structure are complete */
4314   /*------------------------------------------------*/
4315   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4316   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4317 
4318   /* send and recv i-structure */
4319   /*---------------------------*/
4320   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4321   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4322 
4323   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4324   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4325   for (proc=0,k=0; proc<size; proc++){
4326     if (!len_s[proc]) continue;
4327     /* form outgoing message for i-structure:
4328          buf_si[0]:                 nrows to be sent
4329                [1:nrows]:           row index (global)
4330                [nrows+1:2*nrows+1]: i-structure index
4331     */
4332     /*-------------------------------------------*/
4333     nrows = len_si[proc]/2 - 1;
4334     buf_si_i    = buf_si + nrows+1;
4335     buf_si[0]   = nrows;
4336     buf_si_i[0] = 0;
4337     nrows = 0;
4338     for (i=owners[proc]; i<owners[proc+1]; i++){
4339       anzi = ai[i+1] - ai[i];
4340       if (anzi) {
4341         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4342         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4343         nrows++;
4344       }
4345     }
4346     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4347     k++;
4348     buf_si += len_si[proc];
4349   }
4350 
4351   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4352   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4353 
4354   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4355   for (i=0; i<merge->nrecv; i++){
4356     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4357   }
4358 
4359   ierr = PetscFree(len_si);CHKERRQ(ierr);
4360   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4361   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4362   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4363   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4364   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4365   ierr = PetscFree(status);CHKERRQ(ierr);
4366 
4367   /* compute a local seq matrix in each processor */
4368   /*----------------------------------------------*/
4369   /* allocate bi array and free space for accumulating nonzero column info */
4370   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4371   bi[0] = 0;
4372 
4373   /* create and initialize a linked list */
4374   nlnk = N+1;
4375   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4376 
4377   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4378   len = 0;
4379   len  = ai[owners[rank+1]] - ai[owners[rank]];
4380   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4381   current_space = free_space;
4382 
4383   /* determine symbolic info for each local row */
4384   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4385 
4386   for (k=0; k<merge->nrecv; k++){
4387     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4388     nrows = *buf_ri_k[k];
4389     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4390     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4391   }
4392 
4393   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4394   len = 0;
4395   for (i=0;i<m;i++) {
4396     bnzi   = 0;
4397     /* add local non-zero cols of this proc's seqmat into lnk */
4398     arow   = owners[rank] + i;
4399     anzi   = ai[arow+1] - ai[arow];
4400     aj     = a->j + ai[arow];
4401     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4402     bnzi += nlnk;
4403     /* add received col data into lnk */
4404     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4405       if (i == *nextrow[k]) { /* i-th row */
4406         anzi = *(nextai[k]+1) - *nextai[k];
4407         aj   = buf_rj[k] + *nextai[k];
4408         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4409         bnzi += nlnk;
4410         nextrow[k]++; nextai[k]++;
4411       }
4412     }
4413     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4414 
4415     /* if free space is not available, make more free space */
4416     if (current_space->local_remaining<bnzi) {
4417       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4418       nspacedouble++;
4419     }
4420     /* copy data into free space, then initialize lnk */
4421     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4422     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4423 
4424     current_space->array           += bnzi;
4425     current_space->local_used      += bnzi;
4426     current_space->local_remaining -= bnzi;
4427 
4428     bi[i+1] = bi[i] + bnzi;
4429   }
4430 
4431   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4432 
4433   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4434   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4435   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4436 
4437   /* create symbolic parallel matrix B_mpi */
4438   /*---------------------------------------*/
4439   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4440   if (n==PETSC_DECIDE) {
4441     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4442   } else {
4443     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4444   }
4445   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4446   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4447   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4448 
4449   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4450   B_mpi->assembled     = PETSC_FALSE;
4451   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4452   merge->bi            = bi;
4453   merge->bj            = bj;
4454   merge->buf_ri        = buf_ri;
4455   merge->buf_rj        = buf_rj;
4456   merge->coi           = PETSC_NULL;
4457   merge->coj           = PETSC_NULL;
4458   merge->owners_co     = PETSC_NULL;
4459 
4460   /* attach the supporting struct to B_mpi for reuse */
4461   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4462   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4463   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4464   *mpimat = B_mpi;
4465 
4466   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4467   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4468   PetscFunctionReturn(0);
4469 }
4470 
4471 #undef __FUNCT__
4472 #define __FUNCT__ "MatMerge_SeqsToMPI"
4473 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4474 {
4475   PetscErrorCode   ierr;
4476 
4477   PetscFunctionBegin;
4478   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4479   if (scall == MAT_INITIAL_MATRIX){
4480     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4481   }
4482   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4483   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4484   PetscFunctionReturn(0);
4485 }
4486 
4487 #undef __FUNCT__
4488 #define __FUNCT__ "MatGetLocalMat"
4489 /*@
4490      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4491 
4492     Not Collective
4493 
4494    Input Parameters:
4495 +    A - the matrix
4496 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4497 
4498    Output Parameter:
4499 .    A_loc - the local sequential matrix generated
4500 
4501     Level: developer
4502 
4503 @*/
4504 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4505 {
4506   PetscErrorCode  ierr;
4507   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4508   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4509   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4510   MatScalar       *aa=a->a,*ba=b->a,*cam;
4511   PetscScalar     *ca;
4512   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4513   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4514 
4515   PetscFunctionBegin;
4516   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4517   if (scall == MAT_INITIAL_MATRIX){
4518     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4519     ci[0] = 0;
4520     for (i=0; i<am; i++){
4521       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4522     }
4523     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4524     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4525     k = 0;
4526     for (i=0; i<am; i++) {
4527       ncols_o = bi[i+1] - bi[i];
4528       ncols_d = ai[i+1] - ai[i];
4529       /* off-diagonal portion of A */
4530       for (jo=0; jo<ncols_o; jo++) {
4531         col = cmap[*bj];
4532         if (col >= cstart) break;
4533         cj[k]   = col; bj++;
4534         ca[k++] = *ba++;
4535       }
4536       /* diagonal portion of A */
4537       for (j=0; j<ncols_d; j++) {
4538         cj[k]   = cstart + *aj++;
4539         ca[k++] = *aa++;
4540       }
4541       /* off-diagonal portion of A */
4542       for (j=jo; j<ncols_o; j++) {
4543         cj[k]   = cmap[*bj++];
4544         ca[k++] = *ba++;
4545       }
4546     }
4547     /* put together the new matrix */
4548     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4549     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4550     /* Since these are PETSc arrays, change flags to free them as necessary. */
4551     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4552     mat->free_a  = PETSC_TRUE;
4553     mat->free_ij = PETSC_TRUE;
4554     mat->nonew   = 0;
4555   } else if (scall == MAT_REUSE_MATRIX){
4556     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4557     ci = mat->i; cj = mat->j; cam = mat->a;
4558     for (i=0; i<am; i++) {
4559       /* off-diagonal portion of A */
4560       ncols_o = bi[i+1] - bi[i];
4561       for (jo=0; jo<ncols_o; jo++) {
4562         col = cmap[*bj];
4563         if (col >= cstart) break;
4564         *cam++ = *ba++; bj++;
4565       }
4566       /* diagonal portion of A */
4567       ncols_d = ai[i+1] - ai[i];
4568       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4569       /* off-diagonal portion of A */
4570       for (j=jo; j<ncols_o; j++) {
4571         *cam++ = *ba++; bj++;
4572       }
4573     }
4574   } else {
4575     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4576   }
4577 
4578   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4579   PetscFunctionReturn(0);
4580 }
4581 
4582 #undef __FUNCT__
4583 #define __FUNCT__ "MatGetLocalMatCondensed"
4584 /*@C
4585      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4586 
4587     Not Collective
4588 
4589    Input Parameters:
4590 +    A - the matrix
4591 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4592 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4593 
4594    Output Parameter:
4595 .    A_loc - the local sequential matrix generated
4596 
4597     Level: developer
4598 
4599 @*/
4600 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4601 {
4602   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4603   PetscErrorCode    ierr;
4604   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4605   IS                isrowa,iscola;
4606   Mat               *aloc;
4607 
4608   PetscFunctionBegin;
4609   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4610   if (!row){
4611     start = A->rmap->rstart; end = A->rmap->rend;
4612     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4613   } else {
4614     isrowa = *row;
4615   }
4616   if (!col){
4617     start = A->cmap->rstart;
4618     cmap  = a->garray;
4619     nzA   = a->A->cmap->n;
4620     nzB   = a->B->cmap->n;
4621     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4622     ncols = 0;
4623     for (i=0; i<nzB; i++) {
4624       if (cmap[i] < start) idx[ncols++] = cmap[i];
4625       else break;
4626     }
4627     imark = i;
4628     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4629     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4630     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4631   } else {
4632     iscola = *col;
4633   }
4634   if (scall != MAT_INITIAL_MATRIX){
4635     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4636     aloc[0] = *A_loc;
4637   }
4638   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4639   *A_loc = aloc[0];
4640   ierr = PetscFree(aloc);CHKERRQ(ierr);
4641   if (!row){
4642     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4643   }
4644   if (!col){
4645     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4646   }
4647   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4648   PetscFunctionReturn(0);
4649 }
4650 
4651 #undef __FUNCT__
4652 #define __FUNCT__ "MatGetBrowsOfAcols"
4653 /*@C
4654     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4655 
4656     Collective on Mat
4657 
4658    Input Parameters:
4659 +    A,B - the matrices in mpiaij format
4660 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4661 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4662 
4663    Output Parameter:
4664 +    rowb, colb - index sets of rows and columns of B to extract
4665 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4666 -    B_seq - the sequential matrix generated
4667 
4668     Level: developer
4669 
4670 @*/
4671 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4672 {
4673   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4674   PetscErrorCode    ierr;
4675   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4676   IS                isrowb,iscolb;
4677   Mat               *bseq;
4678 
4679   PetscFunctionBegin;
4680   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4681     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4682   }
4683   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4684 
4685   if (scall == MAT_INITIAL_MATRIX){
4686     start = A->cmap->rstart;
4687     cmap  = a->garray;
4688     nzA   = a->A->cmap->n;
4689     nzB   = a->B->cmap->n;
4690     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4691     ncols = 0;
4692     for (i=0; i<nzB; i++) {  /* row < local row index */
4693       if (cmap[i] < start) idx[ncols++] = cmap[i];
4694       else break;
4695     }
4696     imark = i;
4697     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4698     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4699     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4700     *brstart = imark;
4701     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4702   } else {
4703     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4704     isrowb = *rowb; iscolb = *colb;
4705     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4706     bseq[0] = *B_seq;
4707   }
4708   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4709   *B_seq = bseq[0];
4710   ierr = PetscFree(bseq);CHKERRQ(ierr);
4711   if (!rowb){
4712     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4713   } else {
4714     *rowb = isrowb;
4715   }
4716   if (!colb){
4717     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4718   } else {
4719     *colb = iscolb;
4720   }
4721   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4722   PetscFunctionReturn(0);
4723 }
4724 
4725 #undef __FUNCT__
4726 #define __FUNCT__ "MatGetBrowsOfAoCols"
4727 /*@C
4728     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4729     of the OFF-DIAGONAL portion of local A
4730 
4731     Collective on Mat
4732 
4733    Input Parameters:
4734 +    A,B - the matrices in mpiaij format
4735 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4736 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4737 .    startsj_r - similar to startsj for receives
4738 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4739 
4740    Output Parameter:
4741 +    B_oth - the sequential matrix generated
4742 
4743     Level: developer
4744 
4745 @*/
4746 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4747 {
4748   VecScatter_MPI_General *gen_to,*gen_from;
4749   PetscErrorCode         ierr;
4750   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4751   Mat_SeqAIJ             *b_oth;
4752   VecScatter             ctx=a->Mvctx;
4753   MPI_Comm               comm=((PetscObject)ctx)->comm;
4754   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4755   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4756   PetscScalar            *rvalues,*svalues;
4757   MatScalar              *b_otha,*bufa,*bufA;
4758   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4759   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4760   MPI_Status             *sstatus,rstatus;
4761   PetscMPIInt            jj;
4762   PetscInt               *cols,sbs,rbs;
4763   PetscScalar            *vals;
4764 
4765   PetscFunctionBegin;
4766   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4767     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4768   }
4769   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4770   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4771 
4772   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4773   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4774   rvalues  = gen_from->values; /* holds the length of receiving row */
4775   svalues  = gen_to->values;   /* holds the length of sending row */
4776   nrecvs   = gen_from->n;
4777   nsends   = gen_to->n;
4778 
4779   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4780   srow     = gen_to->indices;   /* local row index to be sent */
4781   sstarts  = gen_to->starts;
4782   sprocs   = gen_to->procs;
4783   sstatus  = gen_to->sstatus;
4784   sbs      = gen_to->bs;
4785   rstarts  = gen_from->starts;
4786   rprocs   = gen_from->procs;
4787   rbs      = gen_from->bs;
4788 
4789   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4790   if (scall == MAT_INITIAL_MATRIX){
4791     /* i-array */
4792     /*---------*/
4793     /*  post receives */
4794     for (i=0; i<nrecvs; i++){
4795       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4796       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4797       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4798     }
4799 
4800     /* pack the outgoing message */
4801     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4802     sstartsj[0] = 0;  rstartsj[0] = 0;
4803     len = 0; /* total length of j or a array to be sent */
4804     k = 0;
4805     for (i=0; i<nsends; i++){
4806       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4807       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4808       for (j=0; j<nrows; j++) {
4809         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4810         for (l=0; l<sbs; l++){
4811           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4812           rowlen[j*sbs+l] = ncols;
4813           len += ncols;
4814           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4815         }
4816         k++;
4817       }
4818       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4819       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4820     }
4821     /* recvs and sends of i-array are completed */
4822     i = nrecvs;
4823     while (i--) {
4824       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4825     }
4826     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4827 
4828     /* allocate buffers for sending j and a arrays */
4829     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4830     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4831 
4832     /* create i-array of B_oth */
4833     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4834     b_othi[0] = 0;
4835     len = 0; /* total length of j or a array to be received */
4836     k = 0;
4837     for (i=0; i<nrecvs; i++){
4838       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4839       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4840       for (j=0; j<nrows; j++) {
4841         b_othi[k+1] = b_othi[k] + rowlen[j];
4842         len += rowlen[j]; k++;
4843       }
4844       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4845     }
4846 
4847     /* allocate space for j and a arrrays of B_oth */
4848     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4849     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4850 
4851     /* j-array */
4852     /*---------*/
4853     /*  post receives of j-array */
4854     for (i=0; i<nrecvs; i++){
4855       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4856       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4857     }
4858 
4859     /* pack the outgoing message j-array */
4860     k = 0;
4861     for (i=0; i<nsends; i++){
4862       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4863       bufJ = bufj+sstartsj[i];
4864       for (j=0; j<nrows; j++) {
4865         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4866         for (ll=0; ll<sbs; ll++){
4867           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4868           for (l=0; l<ncols; l++){
4869             *bufJ++ = cols[l];
4870           }
4871           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4872         }
4873       }
4874       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4875     }
4876 
4877     /* recvs and sends of j-array are completed */
4878     i = nrecvs;
4879     while (i--) {
4880       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4881     }
4882     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4883   } else if (scall == MAT_REUSE_MATRIX){
4884     sstartsj = *startsj;
4885     rstartsj = *startsj_r;
4886     bufa     = *bufa_ptr;
4887     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4888     b_otha   = b_oth->a;
4889   } else {
4890     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4891   }
4892 
4893   /* a-array */
4894   /*---------*/
4895   /*  post receives of a-array */
4896   for (i=0; i<nrecvs; i++){
4897     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4898     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4899   }
4900 
4901   /* pack the outgoing message a-array */
4902   k = 0;
4903   for (i=0; i<nsends; i++){
4904     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4905     bufA = bufa+sstartsj[i];
4906     for (j=0; j<nrows; j++) {
4907       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4908       for (ll=0; ll<sbs; ll++){
4909         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4910         for (l=0; l<ncols; l++){
4911           *bufA++ = vals[l];
4912         }
4913         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4914       }
4915     }
4916     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4917   }
4918   /* recvs and sends of a-array are completed */
4919   i = nrecvs;
4920   while (i--) {
4921     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4922   }
4923   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4924   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4925 
4926   if (scall == MAT_INITIAL_MATRIX){
4927     /* put together the new matrix */
4928     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4929 
4930     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4931     /* Since these are PETSc arrays, change flags to free them as necessary. */
4932     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4933     b_oth->free_a  = PETSC_TRUE;
4934     b_oth->free_ij = PETSC_TRUE;
4935     b_oth->nonew   = 0;
4936 
4937     ierr = PetscFree(bufj);CHKERRQ(ierr);
4938     if (!startsj || !bufa_ptr){
4939       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4940       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4941     } else {
4942       *startsj   = sstartsj;
4943       *startsj_r = rstartsj;
4944       *bufa_ptr  = bufa;
4945     }
4946   }
4947   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4948   PetscFunctionReturn(0);
4949 }
4950 
4951 #undef __FUNCT__
4952 #define __FUNCT__ "MatGetCommunicationStructs"
4953 /*@C
4954   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4955 
4956   Not Collective
4957 
4958   Input Parameters:
4959 . A - The matrix in mpiaij format
4960 
4961   Output Parameter:
4962 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4963 . colmap - A map from global column index to local index into lvec
4964 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4965 
4966   Level: developer
4967 
4968 @*/
4969 #if defined (PETSC_USE_CTABLE)
4970 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4971 #else
4972 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4973 #endif
4974 {
4975   Mat_MPIAIJ *a;
4976 
4977   PetscFunctionBegin;
4978   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4979   PetscValidPointer(lvec, 2);
4980   PetscValidPointer(colmap, 3);
4981   PetscValidPointer(multScatter, 4);
4982   a = (Mat_MPIAIJ *) A->data;
4983   if (lvec) *lvec = a->lvec;
4984   if (colmap) *colmap = a->colmap;
4985   if (multScatter) *multScatter = a->Mvctx;
4986   PetscFunctionReturn(0);
4987 }
4988 
4989 EXTERN_C_BEGIN
4990 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
4991 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
4992 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4993 EXTERN_C_END
4994 
4995 #undef __FUNCT__
4996 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4997 /*
4998     Computes (B'*A')' since computing B*A directly is untenable
4999 
5000                n                       p                          p
5001         (              )       (              )         (                  )
5002       m (      A       )  *  n (       B      )   =   m (         C        )
5003         (              )       (              )         (                  )
5004 
5005 */
5006 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5007 {
5008   PetscErrorCode     ierr;
5009   Mat                At,Bt,Ct;
5010 
5011   PetscFunctionBegin;
5012   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5013   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5014   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5015   ierr = MatDestroy(At);CHKERRQ(ierr);
5016   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5017   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5018   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5019   PetscFunctionReturn(0);
5020 }
5021 
5022 #undef __FUNCT__
5023 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5024 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5025 {
5026   PetscErrorCode ierr;
5027   PetscInt       m=A->rmap->n,n=B->cmap->n;
5028   Mat            Cmat;
5029 
5030   PetscFunctionBegin;
5031   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5032   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5033   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5034   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5035   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5036   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5037   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5038   *C   = Cmat;
5039   PetscFunctionReturn(0);
5040 }
5041 
5042 /* ----------------------------------------------------------------*/
5043 #undef __FUNCT__
5044 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5045 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5046 {
5047   PetscErrorCode ierr;
5048 
5049   PetscFunctionBegin;
5050   if (scall == MAT_INITIAL_MATRIX){
5051     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5052   }
5053   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5054   PetscFunctionReturn(0);
5055 }
5056 
5057 EXTERN_C_BEGIN
5058 #if defined(PETSC_HAVE_MUMPS)
5059 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5060 #endif
5061 #if defined(PETSC_HAVE_PASTIX)
5062 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5063 #endif
5064 #if defined(PETSC_HAVE_SUPERLU_DIST)
5065 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5066 #endif
5067 #if defined(PETSC_HAVE_SPOOLES)
5068 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5069 #endif
5070 EXTERN_C_END
5071 
5072 /*MC
5073    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5074 
5075    Options Database Keys:
5076 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5077 
5078   Level: beginner
5079 
5080 .seealso: MatCreateMPIAIJ()
5081 M*/
5082 
5083 EXTERN_C_BEGIN
5084 #undef __FUNCT__
5085 #define __FUNCT__ "MatCreate_MPIAIJ"
5086 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5087 {
5088   Mat_MPIAIJ     *b;
5089   PetscErrorCode ierr;
5090   PetscMPIInt    size;
5091 
5092   PetscFunctionBegin;
5093   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5094 
5095   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5096   B->data         = (void*)b;
5097   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5098   B->rmap->bs     = 1;
5099   B->assembled    = PETSC_FALSE;
5100   B->mapping      = 0;
5101 
5102   B->insertmode   = NOT_SET_VALUES;
5103   b->size         = size;
5104   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5105 
5106   /* build cache for off array entries formed */
5107   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5108   b->donotstash  = PETSC_FALSE;
5109   b->colmap      = 0;
5110   b->garray      = 0;
5111   b->roworiented = PETSC_TRUE;
5112 
5113   /* stuff used for matrix vector multiply */
5114   b->lvec      = PETSC_NULL;
5115   b->Mvctx     = PETSC_NULL;
5116 
5117   /* stuff for MatGetRow() */
5118   b->rowindices   = 0;
5119   b->rowvalues    = 0;
5120   b->getrowactive = PETSC_FALSE;
5121 
5122 #if defined(PETSC_HAVE_SPOOLES)
5123   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5124                                      "MatGetFactor_mpiaij_spooles",
5125                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5126 #endif
5127 #if defined(PETSC_HAVE_MUMPS)
5128   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5129                                      "MatGetFactor_aij_mumps",
5130                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5131 #endif
5132 #if defined(PETSC_HAVE_PASTIX)
5133   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5134 					   "MatGetFactor_mpiaij_pastix",
5135 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5136 #endif
5137 #if defined(PETSC_HAVE_SUPERLU_DIST)
5138   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5139                                      "MatGetFactor_mpiaij_superlu_dist",
5140                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5141 #endif
5142   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5143                                      "MatStoreValues_MPIAIJ",
5144                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5145   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5146                                      "MatRetrieveValues_MPIAIJ",
5147                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5148   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5149 				     "MatGetDiagonalBlock_MPIAIJ",
5150                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5151   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5152 				     "MatIsTranspose_MPIAIJ",
5153 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5154   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5155 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5156 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5157   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5158 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5159 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5160   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5161 				     "MatDiagonalScaleLocal_MPIAIJ",
5162 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5163   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5164                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5165                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5166   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5167                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5168                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5169   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5170                                      "MatConvert_MPIAIJ_MPISBAIJ",
5171                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5172   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5173                                      "MatMatMult_MPIDense_MPIAIJ",
5174                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5175   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5176                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5177                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5178   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5179                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5180                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5181   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5182   PetscFunctionReturn(0);
5183 }
5184 EXTERN_C_END
5185 
5186 #undef __FUNCT__
5187 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5188 /*@
5189      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5190          and "off-diagonal" part of the matrix in CSR format.
5191 
5192    Collective on MPI_Comm
5193 
5194    Input Parameters:
5195 +  comm - MPI communicator
5196 .  m - number of local rows (Cannot be PETSC_DECIDE)
5197 .  n - This value should be the same as the local size used in creating the
5198        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5199        calculated if N is given) For square matrices n is almost always m.
5200 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5201 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5202 .   i - row indices for "diagonal" portion of matrix
5203 .   j - column indices
5204 .   a - matrix values
5205 .   oi - row indices for "off-diagonal" portion of matrix
5206 .   oj - column indices
5207 -   oa - matrix values
5208 
5209    Output Parameter:
5210 .   mat - the matrix
5211 
5212    Level: advanced
5213 
5214    Notes:
5215        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5216 
5217        The i and j indices are 0 based
5218 
5219        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5220 
5221        This sets local rows and cannot be used to set off-processor values.
5222 
5223        You cannot later use MatSetValues() to change values in this matrix.
5224 
5225 .keywords: matrix, aij, compressed row, sparse, parallel
5226 
5227 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5228           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5229 @*/
5230 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5231 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5232 {
5233   PetscErrorCode ierr;
5234   Mat_MPIAIJ     *maij;
5235 
5236  PetscFunctionBegin;
5237   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5238   if (i[0]) {
5239     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5240   }
5241   if (oi[0]) {
5242     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5243   }
5244   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5245   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5246   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5247   maij = (Mat_MPIAIJ*) (*mat)->data;
5248   maij->donotstash     = PETSC_TRUE;
5249   (*mat)->preallocated = PETSC_TRUE;
5250 
5251   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5252   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5253   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5254   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5255 
5256   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5257   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5258 
5259   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5260   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5261   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5262   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5263 
5264   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5265   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5266   PetscFunctionReturn(0);
5267 }
5268 
5269 /*
5270     Special version for direct calls from Fortran
5271 */
5272 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5273 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5274 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5275 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5276 #endif
5277 
5278 /* Change these macros so can be used in void function */
5279 #undef CHKERRQ
5280 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5281 #undef SETERRQ2
5282 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5283 #undef SETERRQ
5284 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5285 
5286 EXTERN_C_BEGIN
5287 #undef __FUNCT__
5288 #define __FUNCT__ "matsetvaluesmpiaij_"
5289 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5290 {
5291   Mat             mat = *mmat;
5292   PetscInt        m = *mm, n = *mn;
5293   InsertMode      addv = *maddv;
5294   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5295   PetscScalar     value;
5296   PetscErrorCode  ierr;
5297 
5298   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5299   if (mat->insertmode == NOT_SET_VALUES) {
5300     mat->insertmode = addv;
5301   }
5302 #if defined(PETSC_USE_DEBUG)
5303   else if (mat->insertmode != addv) {
5304     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5305   }
5306 #endif
5307   {
5308   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5309   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5310   PetscBool       roworiented = aij->roworiented;
5311 
5312   /* Some Variables required in the macro */
5313   Mat             A = aij->A;
5314   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5315   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5316   MatScalar       *aa = a->a;
5317   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5318   Mat             B = aij->B;
5319   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5320   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5321   MatScalar       *ba = b->a;
5322 
5323   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5324   PetscInt        nonew = a->nonew;
5325   MatScalar       *ap1,*ap2;
5326 
5327   PetscFunctionBegin;
5328   for (i=0; i<m; i++) {
5329     if (im[i] < 0) continue;
5330 #if defined(PETSC_USE_DEBUG)
5331     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5332 #endif
5333     if (im[i] >= rstart && im[i] < rend) {
5334       row      = im[i] - rstart;
5335       lastcol1 = -1;
5336       rp1      = aj + ai[row];
5337       ap1      = aa + ai[row];
5338       rmax1    = aimax[row];
5339       nrow1    = ailen[row];
5340       low1     = 0;
5341       high1    = nrow1;
5342       lastcol2 = -1;
5343       rp2      = bj + bi[row];
5344       ap2      = ba + bi[row];
5345       rmax2    = bimax[row];
5346       nrow2    = bilen[row];
5347       low2     = 0;
5348       high2    = nrow2;
5349 
5350       for (j=0; j<n; j++) {
5351         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5352         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5353         if (in[j] >= cstart && in[j] < cend){
5354           col = in[j] - cstart;
5355           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5356         } else if (in[j] < 0) continue;
5357 #if defined(PETSC_USE_DEBUG)
5358         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5359 #endif
5360         else {
5361           if (mat->was_assembled) {
5362             if (!aij->colmap) {
5363               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5364             }
5365 #if defined (PETSC_USE_CTABLE)
5366             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5367 	    col--;
5368 #else
5369             col = aij->colmap[in[j]] - 1;
5370 #endif
5371             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5372               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5373               col =  in[j];
5374               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5375               B = aij->B;
5376               b = (Mat_SeqAIJ*)B->data;
5377               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5378               rp2      = bj + bi[row];
5379               ap2      = ba + bi[row];
5380               rmax2    = bimax[row];
5381               nrow2    = bilen[row];
5382               low2     = 0;
5383               high2    = nrow2;
5384               bm       = aij->B->rmap->n;
5385               ba = b->a;
5386             }
5387           } else col = in[j];
5388           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5389         }
5390       }
5391     } else {
5392       if (!aij->donotstash) {
5393         if (roworiented) {
5394           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5395         } else {
5396           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5397         }
5398       }
5399     }
5400   }}
5401   PetscFunctionReturnVoid();
5402 }
5403 EXTERN_C_END
5404 
5405