xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5d4804778dfa45ea00bcad8aabdb3eca36eaeac3)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = sqrt(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew = a->nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506         } else if (in[j] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509 #endif
510         else {
511           if (mat->was_assembled) {
512             if (!aij->colmap) {
513               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
514             }
515 #if defined (PETSC_USE_CTABLE)
516             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
517 	    col--;
518 #else
519             col = aij->colmap[in[j]] - 1;
520 #endif
521             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523               col =  in[j];
524               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525               B = aij->B;
526               b = (Mat_SeqAIJ*)B->data;
527               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528               rp2      = bj + bi[row];
529               ap2      = ba + bi[row];
530               rmax2    = bimax[row];
531               nrow2    = bilen[row];
532               low2     = 0;
533               high2    = nrow2;
534               bm       = aij->B->rmap->n;
535               ba = b->a;
536             }
537           } else col = in[j];
538           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539         }
540       }
541     } else {
542       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543       if (!aij->donotstash) {
544         if (roworiented) {
545           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
546         } else {
547           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPIAIJ"
557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558 {
559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
560   PetscErrorCode ierr;
561   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
563 
564   PetscFunctionBegin;
565   for (i=0; i<m; i++) {
566     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568     if (idxm[i] >= rstart && idxm[i] < rend) {
569       row = idxm[i] - rstart;
570       for (j=0; j<n; j++) {
571         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573         if (idxn[j] >= cstart && idxn[j] < cend){
574           col = idxn[j] - cstart;
575           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
576         } else {
577           if (!aij->colmap) {
578             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
579           }
580 #if defined (PETSC_USE_CTABLE)
581           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
582           col --;
583 #else
584           col = aij->colmap[idxn[j]] - 1;
585 #endif
586           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587           else {
588             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604 {
605   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
606   PetscErrorCode ierr;
607   PetscInt       nstash,reallocs;
608   InsertMode     addv;
609 
610   PetscFunctionBegin;
611   if (aij->donotstash || mat->nooffprocentries) {
612     PetscFunctionReturn(0);
613   }
614 
615   /* make sure all processors are either in INSERTMODE or ADDMODE */
616   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
617   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618   mat->insertmode = addv; /* in case this processor had no cache */
619 
620   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
621   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
622   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
632   PetscErrorCode ierr;
633   PetscMPIInt    n;
634   PetscInt       i,j,rstart,ncols,flg;
635   PetscInt       *row,*col;
636   PetscBool      other_disassembled;
637   PetscScalar    *val;
638   InsertMode     addv = mat->insertmode;
639 
640   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
641   PetscFunctionBegin;
642   if (!aij->donotstash && !mat->nooffprocentries) {
643     while (1) {
644       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
645       if (!flg) break;
646 
647       for (i=0; i<n;) {
648         /* Now identify the consecutive vals belonging to the same row */
649         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650         if (j < n) ncols = j-i;
651         else       ncols = n-i;
652         /* Now assemble all these values with a single function call */
653         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
654         i = j;
655       }
656     }
657     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
658   }
659   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
660   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
661 
662   /* determine if any processor has disassembled, if so we must
663      also disassemble ourselfs, in order that we may reassemble. */
664   /*
665      if nonzero structure of submatrix B cannot change then we know that
666      no processor disassembled thus we can skip this stuff
667   */
668   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
669     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
670     if (mat->was_assembled && !other_disassembled) {
671       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
672     }
673   }
674   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
676   }
677   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
678   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
679   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
681 
682   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
683   aij->rowvalues = 0;
684 
685   /* used by MatAXPY() */
686   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
687   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
688 
689   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
690   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697 {
698   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
699   PetscErrorCode ierr;
700 
701   PetscFunctionBegin;
702   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
703   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatZeroRows_MPIAIJ"
709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710 {
711   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
712   PetscErrorCode    ierr;
713   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714   PetscInt          i,*owners = A->rmap->range;
715   PetscInt          *nprocs,j,idx,nsends,row;
716   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
717   PetscInt          *rvalues,count,base,slen,*source;
718   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
719   MPI_Comm          comm = ((PetscObject)A)->comm;
720   MPI_Request       *send_waits,*recv_waits;
721   MPI_Status        recv_status,*send_status;
722   const PetscScalar *xx;
723   PetscScalar       *bb;
724 #if defined(PETSC_DEBUG)
725   PetscBool      found = PETSC_FALSE;
726 #endif
727 
728   PetscFunctionBegin;
729   /*  first count number of contributors to each processor */
730   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
731   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
732   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
733   j = 0;
734   for (i=0; i<N; i++) {
735     if (lastidx > (idx = rows[i])) j = 0;
736     lastidx = idx;
737     for (; j<size; j++) {
738       if (idx >= owners[j] && idx < owners[j+1]) {
739         nprocs[2*j]++;
740         nprocs[2*j+1] = 1;
741         owner[i] = j;
742 #if defined(PETSC_DEBUG)
743         found = PETSC_TRUE;
744 #endif
745         break;
746       }
747     }
748 #if defined(PETSC_DEBUG)
749     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750     found = PETSC_FALSE;
751 #endif
752   }
753   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
754 
755   if (A->nooffproczerorows) {
756     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757     nrecvs = nsends;
758     nmax   = N;
759   } else {
760     /* inform other processors of number of messages and max length*/
761     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
762   }
763 
764   /* post receives:   */
765   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
766   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
767   for (i=0; i<nrecvs; i++) {
768     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
769   }
770 
771   /* do sends:
772       1) starts[i] gives the starting index in svalues for stuff going to
773          the ith processor
774   */
775   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
776   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
777   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
778   starts[0] = 0;
779   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780   for (i=0; i<N; i++) {
781     svalues[starts[owner[i]]++] = rows[i];
782   }
783 
784   starts[0] = 0;
785   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786   count = 0;
787   for (i=0; i<size; i++) {
788     if (nprocs[2*i+1]) {
789       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
790     }
791   }
792   ierr = PetscFree(starts);CHKERRQ(ierr);
793 
794   base = owners[rank];
795 
796   /*  wait on receives */
797   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
798   count  = nrecvs; slen = 0;
799   while (count) {
800     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
801     /* unpack receives into our local space */
802     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
803     source[imdex]  = recv_status.MPI_SOURCE;
804     lens[imdex]    = n;
805     slen          += n;
806     count--;
807   }
808   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
809 
810   /* move the data into the send scatter */
811   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
812   count = 0;
813   for (i=0; i<nrecvs; i++) {
814     values = rvalues + i*nmax;
815     for (j=0; j<lens[i]; j++) {
816       lrows[count++] = values[j] - base;
817     }
818   }
819   ierr = PetscFree(rvalues);CHKERRQ(ierr);
820   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
821   ierr = PetscFree(owner);CHKERRQ(ierr);
822   ierr = PetscFree(nprocs);CHKERRQ(ierr);
823 
824   /* fix right hand side if needed */
825   if (x && b) {
826     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
827     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
828     for (i=0; i<slen; i++) {
829       bb[lrows[i]] = diag*xx[lrows[i]];
830     }
831     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
833   }
834   /*
835         Zero the required rows. If the "diagonal block" of the matrix
836      is square and the user wishes to set the diagonal we use separate
837      code so that MatSetValues() is not called for each diagonal allocating
838      new memory, thus calling lots of mallocs and slowing things down.
839 
840   */
841   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
843   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850     }
851     for (i = 0; i < slen; i++) {
852       row  = lrows[i] + rstart;
853       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
854     }
855     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857   } else {
858     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(lrows);CHKERRQ(ierr);
861 
862   /* wait on sends */
863   if (nsends) {
864     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
865     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
866     ierr = PetscFree(send_status);CHKERRQ(ierr);
867   }
868   ierr = PetscFree(send_waits);CHKERRQ(ierr);
869   ierr = PetscFree(svalues);CHKERRQ(ierr);
870 
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
876 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
877 {
878   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
879   PetscErrorCode    ierr;
880   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
881   PetscInt          i,*owners = A->rmap->range;
882   PetscInt          *nprocs,j,idx,nsends;
883   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
884   PetscInt          *rvalues,count,base,slen,*source;
885   PetscInt          *lens,*lrows,*values,m;
886   MPI_Comm          comm = ((PetscObject)A)->comm;
887   MPI_Request       *send_waits,*recv_waits;
888   MPI_Status        recv_status,*send_status;
889   const PetscScalar *xx;
890   PetscScalar       *bb,*mask;
891   Vec               xmask,lmask;
892   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
893   const PetscInt    *aj, *ii,*ridx;
894   PetscScalar       *aa;
895 #if defined(PETSC_DEBUG)
896   PetscBool         found = PETSC_FALSE;
897 #endif
898 
899   PetscFunctionBegin;
900   /*  first count number of contributors to each processor */
901   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
902   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
903   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
904   j = 0;
905   for (i=0; i<N; i++) {
906     if (lastidx > (idx = rows[i])) j = 0;
907     lastidx = idx;
908     for (; j<size; j++) {
909       if (idx >= owners[j] && idx < owners[j+1]) {
910         nprocs[2*j]++;
911         nprocs[2*j+1] = 1;
912         owner[i] = j;
913 #if defined(PETSC_DEBUG)
914         found = PETSC_TRUE;
915 #endif
916         break;
917       }
918     }
919 #if defined(PETSC_DEBUG)
920     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
921     found = PETSC_FALSE;
922 #endif
923   }
924   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
925 
926   /* inform other processors of number of messages and max length*/
927   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
928 
929   /* post receives:   */
930   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
931   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
932   for (i=0; i<nrecvs; i++) {
933     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
934   }
935 
936   /* do sends:
937       1) starts[i] gives the starting index in svalues for stuff going to
938          the ith processor
939   */
940   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
941   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
942   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
943   starts[0] = 0;
944   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
945   for (i=0; i<N; i++) {
946     svalues[starts[owner[i]]++] = rows[i];
947   }
948 
949   starts[0] = 0;
950   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
951   count = 0;
952   for (i=0; i<size; i++) {
953     if (nprocs[2*i+1]) {
954       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
955     }
956   }
957   ierr = PetscFree(starts);CHKERRQ(ierr);
958 
959   base = owners[rank];
960 
961   /*  wait on receives */
962   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
963   count  = nrecvs; slen = 0;
964   while (count) {
965     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
966     /* unpack receives into our local space */
967     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
968     source[imdex]  = recv_status.MPI_SOURCE;
969     lens[imdex]    = n;
970     slen          += n;
971     count--;
972   }
973   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
974 
975   /* move the data into the send scatter */
976   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
977   count = 0;
978   for (i=0; i<nrecvs; i++) {
979     values = rvalues + i*nmax;
980     for (j=0; j<lens[i]; j++) {
981       lrows[count++] = values[j] - base;
982     }
983   }
984   ierr = PetscFree(rvalues);CHKERRQ(ierr);
985   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
986   ierr = PetscFree(owner);CHKERRQ(ierr);
987   ierr = PetscFree(nprocs);CHKERRQ(ierr);
988   /* lrows are the local rows to be zeroed, slen is the number of local rows */
989 
990   /* zero diagonal part of matrix */
991   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
992 
993   /* handle off diagonal part of matrix */
994   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
995   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
996   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
997   for (i=0; i<slen; i++) {
998     bb[lrows[i]] = 1;
999   }
1000   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1001   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1003   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1004   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1007   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1008   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1009 
1010   /* remove zeroed rows of off diagonal matrix */
1011   ii = aij->i;
1012   for (i=0; i<slen; i++) {
1013     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1014   }
1015 
1016   /* loop over all elements of off process part of matrix zeroing removed columns*/
1017   if (aij->compressedrow.use){
1018     m    = aij->compressedrow.nrows;
1019     ii   = aij->compressedrow.i;
1020     ridx = aij->compressedrow.rindex;
1021     for (i=0; i<m; i++){
1022       n   = ii[i+1] - ii[i];
1023       aj  = aij->j + ii[i];
1024       aa  = aij->a + ii[i];
1025 
1026       for (j=0; j<n; j++) {
1027         if (PetscAbsScalar(mask[*aj])) {
1028           bb[*ridx] -= *aa*xx[*aj];
1029           *aa        = 0.0;
1030         }
1031         aa++;
1032         aj++;
1033       }
1034       ridx++;
1035     }
1036   } else { /* do not use compressed row format */
1037     m = l->B->rmap->n;
1038    for (i=0; i<m; i++) {
1039      n   = ii[i+1] - ii[i];
1040      aj  = aij->j + ii[i];
1041      aa  = aij->a + ii[i];
1042      for (j=0; j<n; j++) {
1043         if (PetscAbsScalar(mask[*aj])) {
1044           bb[i] -= *aa*xx[*aj];
1045           *aa    = 0.0;
1046         }
1047         aa++;
1048         aj++;
1049       }
1050     }
1051   }
1052 
1053   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1054   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1055   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1056   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1057   ierr = PetscFree(lrows);CHKERRQ(ierr);
1058 
1059   /* wait on sends */
1060   if (nsends) {
1061     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1062     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1063     ierr = PetscFree(send_status);CHKERRQ(ierr);
1064   }
1065   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1066   ierr = PetscFree(svalues);CHKERRQ(ierr);
1067 
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatMult_MPIAIJ"
1073 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1074 {
1075   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1076   PetscErrorCode ierr;
1077   PetscInt       nt;
1078 
1079   PetscFunctionBegin;
1080   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1081   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1082   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1083   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1084   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1085   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1091 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1092 {
1093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1103 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1111   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1118 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1119 {
1120   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1121   PetscErrorCode ierr;
1122   PetscBool      merged;
1123 
1124   PetscFunctionBegin;
1125   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1126   /* do nondiagonal part */
1127   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1128   if (!merged) {
1129     /* send it on its way */
1130     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1131     /* do local part */
1132     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1133     /* receive remote parts: note this assumes the values are not actually */
1134     /* added in yy until the next line, */
1135     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1136   } else {
1137     /* do local part */
1138     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1139     /* send it on its way */
1140     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1141     /* values actually were received in the Begin() but we need to call this nop */
1142     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   }
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 EXTERN_C_BEGIN
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1150 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1151 {
1152   MPI_Comm       comm;
1153   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1154   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1155   IS             Me,Notme;
1156   PetscErrorCode ierr;
1157   PetscInt       M,N,first,last,*notme,i;
1158   PetscMPIInt    size;
1159 
1160   PetscFunctionBegin;
1161 
1162   /* Easy test: symmetric diagonal block */
1163   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1164   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1165   if (!*f) PetscFunctionReturn(0);
1166   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1167   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1168   if (size == 1) PetscFunctionReturn(0);
1169 
1170   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1171   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1172   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1173   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1174   for (i=0; i<first; i++) notme[i] = i;
1175   for (i=last; i<M; i++) notme[i-last+first] = i;
1176   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1177   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1178   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1179   Aoff = Aoffs[0];
1180   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1181   Boff = Boffs[0];
1182   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1183   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1184   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1185   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1186   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1187   ierr = PetscFree(notme);CHKERRQ(ierr);
1188    PetscFunctionReturn(0);
1189 }
1190 EXTERN_C_END
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1194 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1195 {
1196   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1197   PetscErrorCode ierr;
1198 
1199   PetscFunctionBegin;
1200   /* do nondiagonal part */
1201   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1202   /* send it on its way */
1203   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1204   /* do local part */
1205   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1206   /* receive remote parts */
1207   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 /*
1212   This only works correctly for square matrices where the subblock A->A is the
1213    diagonal block
1214 */
1215 #undef __FUNCT__
1216 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1217 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1218 {
1219   PetscErrorCode ierr;
1220   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1221 
1222   PetscFunctionBegin;
1223   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1224   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1225   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatScale_MPIAIJ"
1231 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1232 {
1233   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1234   PetscErrorCode ierr;
1235 
1236   PetscFunctionBegin;
1237   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1238   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatDestroy_MPIAIJ"
1244 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1245 {
1246   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250 #if defined(PETSC_USE_LOG)
1251   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1252 #endif
1253   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1254   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1255   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1256   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1257 #if defined (PETSC_USE_CTABLE)
1258   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1259 #else
1260   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1261 #endif
1262   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1263   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1264   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1265   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1266   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1267   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1268 
1269   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1283 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1284 {
1285   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1286   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1287   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1288   PetscErrorCode    ierr;
1289   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1290   int               fd;
1291   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1292   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1293   PetscScalar       *column_values;
1294   PetscInt          message_count,flowcontrolcount;
1295 
1296   PetscFunctionBegin;
1297   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1298   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1299   nz   = A->nz + B->nz;
1300   if (!rank) {
1301     header[0] = MAT_FILE_CLASSID;
1302     header[1] = mat->rmap->N;
1303     header[2] = mat->cmap->N;
1304     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1305     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1306     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1307     /* get largest number of rows any processor has */
1308     rlen = mat->rmap->n;
1309     range = mat->rmap->range;
1310     for (i=1; i<size; i++) {
1311       rlen = PetscMax(rlen,range[i+1] - range[i]);
1312     }
1313   } else {
1314     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1315     rlen = mat->rmap->n;
1316   }
1317 
1318   /* load up the local row counts */
1319   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1320   for (i=0; i<mat->rmap->n; i++) {
1321     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1322   }
1323 
1324   /* store the row lengths to the file */
1325   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1326   if (!rank) {
1327     MPI_Status status;
1328     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1329     for (i=1; i<size; i++) {
1330       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1331       rlen = range[i+1] - range[i];
1332       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1333       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1334     }
1335     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1336   } else {
1337     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1338     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1339     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1340   }
1341   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1342 
1343   /* load up the local column indices */
1344   nzmax = nz; /* )th processor needs space a largest processor needs */
1345   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1346   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1347   cnt  = 0;
1348   for (i=0; i<mat->rmap->n; i++) {
1349     for (j=B->i[i]; j<B->i[i+1]; j++) {
1350       if ( (col = garray[B->j[j]]) > cstart) break;
1351       column_indices[cnt++] = col;
1352     }
1353     for (k=A->i[i]; k<A->i[i+1]; k++) {
1354       column_indices[cnt++] = A->j[k] + cstart;
1355     }
1356     for (; j<B->i[i+1]; j++) {
1357       column_indices[cnt++] = garray[B->j[j]];
1358     }
1359   }
1360   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1361 
1362   /* store the column indices to the file */
1363    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1364   if (!rank) {
1365     MPI_Status status;
1366     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1367     for (i=1; i<size; i++) {
1368       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1369       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1370       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1371       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1372       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1373     }
1374      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1375   } else {
1376     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1377     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1378     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1380   }
1381   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1382 
1383   /* load up the local column values */
1384   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1385   cnt  = 0;
1386   for (i=0; i<mat->rmap->n; i++) {
1387     for (j=B->i[i]; j<B->i[i+1]; j++) {
1388       if ( garray[B->j[j]] > cstart) break;
1389       column_values[cnt++] = B->a[j];
1390     }
1391     for (k=A->i[i]; k<A->i[i+1]; k++) {
1392       column_values[cnt++] = A->a[k];
1393     }
1394     for (; j<B->i[i+1]; j++) {
1395       column_values[cnt++] = B->a[j];
1396     }
1397   }
1398   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1399 
1400   /* store the column values to the file */
1401    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1402   if (!rank) {
1403     MPI_Status status;
1404     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1405     for (i=1; i<size; i++) {
1406        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1407       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1408       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1409       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1410       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1411     }
1412     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1413   } else {
1414     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1415     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1416     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1418   }
1419   ierr = PetscFree(column_values);CHKERRQ(ierr);
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1425 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1426 {
1427   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1428   PetscErrorCode    ierr;
1429   PetscMPIInt       rank = aij->rank,size = aij->size;
1430   PetscBool         isdraw,iascii,isbinary;
1431   PetscViewer       sviewer;
1432   PetscViewerFormat format;
1433 
1434   PetscFunctionBegin;
1435   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1436   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1437   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1438   if (iascii) {
1439     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1440     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1441       MatInfo    info;
1442       PetscBool  inodes;
1443 
1444       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1445       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1446       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1447       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1448       if (!inodes) {
1449         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1450 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1451       } else {
1452         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1453 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1454       }
1455       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1456       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1457       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1458       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1459       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1460       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1461       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1462       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1463       PetscFunctionReturn(0);
1464     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1465       PetscInt   inodecount,inodelimit,*inodes;
1466       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1467       if (inodes) {
1468         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1469       } else {
1470         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1471       }
1472       PetscFunctionReturn(0);
1473     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1474       PetscFunctionReturn(0);
1475     }
1476   } else if (isbinary) {
1477     if (size == 1) {
1478       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1479       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1480     } else {
1481       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1482     }
1483     PetscFunctionReturn(0);
1484   } else if (isdraw) {
1485     PetscDraw  draw;
1486     PetscBool  isnull;
1487     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1488     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1489   }
1490 
1491   if (size == 1) {
1492     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1493     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1494   } else {
1495     /* assemble the entire matrix onto first processor. */
1496     Mat         A;
1497     Mat_SeqAIJ  *Aloc;
1498     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1499     MatScalar   *a;
1500 
1501     if (mat->rmap->N > 1024) {
1502       PetscBool  flg = PETSC_FALSE;
1503 
1504       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1505       if (!flg) {
1506         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1507       }
1508     }
1509 
1510     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1511     if (!rank) {
1512       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1513     } else {
1514       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1515     }
1516     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1517     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1518     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1519     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1520 
1521     /* copy over the A part */
1522     Aloc = (Mat_SeqAIJ*)aij->A->data;
1523     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1524     row = mat->rmap->rstart;
1525     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1526     for (i=0; i<m; i++) {
1527       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1528       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1529     }
1530     aj = Aloc->j;
1531     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1532 
1533     /* copy over the B part */
1534     Aloc = (Mat_SeqAIJ*)aij->B->data;
1535     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1536     row  = mat->rmap->rstart;
1537     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1538     ct   = cols;
1539     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1540     for (i=0; i<m; i++) {
1541       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1542       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1543     }
1544     ierr = PetscFree(ct);CHKERRQ(ierr);
1545     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1546     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1547     /*
1548        Everyone has to call to draw the matrix since the graphics waits are
1549        synchronized across all processors that share the PetscDraw object
1550     */
1551     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1552     if (!rank) {
1553       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1554       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1555       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1556       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1557     }
1558     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1559     ierr = MatDestroy(&A);CHKERRQ(ierr);
1560   }
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 #undef __FUNCT__
1565 #define __FUNCT__ "MatView_MPIAIJ"
1566 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1567 {
1568   PetscErrorCode ierr;
1569   PetscBool      iascii,isdraw,issocket,isbinary;
1570 
1571   PetscFunctionBegin;
1572   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1573   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1574   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1575   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1576   if (iascii || isdraw || isbinary || issocket) {
1577     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1578   } else {
1579     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatSOR_MPIAIJ"
1586 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1587 {
1588   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1589   PetscErrorCode ierr;
1590   Vec            bb1 = 0;
1591   PetscBool      hasop;
1592 
1593   PetscFunctionBegin;
1594   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1595     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1596   }
1597 
1598   if (flag == SOR_APPLY_UPPER) {
1599     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1600     PetscFunctionReturn(0);
1601   }
1602 
1603   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1604     if (flag & SOR_ZERO_INITIAL_GUESS) {
1605       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1606       its--;
1607     }
1608 
1609     while (its--) {
1610       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1611       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1612 
1613       /* update rhs: bb1 = bb - B*x */
1614       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1615       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1616 
1617       /* local sweep */
1618       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1619     }
1620   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1621     if (flag & SOR_ZERO_INITIAL_GUESS) {
1622       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1623       its--;
1624     }
1625     while (its--) {
1626       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1627       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1628 
1629       /* update rhs: bb1 = bb - B*x */
1630       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1631       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1632 
1633       /* local sweep */
1634       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1635     }
1636   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1637     if (flag & SOR_ZERO_INITIAL_GUESS) {
1638       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1639       its--;
1640     }
1641     while (its--) {
1642       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1643       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1644 
1645       /* update rhs: bb1 = bb - B*x */
1646       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1647       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1648 
1649       /* local sweep */
1650       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1651     }
1652   }  else if (flag & SOR_EISENSTAT) {
1653     Vec         xx1;
1654 
1655     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1656     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1657 
1658     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1659     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660     if (!mat->diag) {
1661       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1662       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1663     }
1664     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1665     if (hasop) {
1666       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1667     } else {
1668       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1669     }
1670     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1671 
1672     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1673 
1674     /* local sweep */
1675     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1676     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1677     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1678   } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1679 
1680   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 #undef __FUNCT__
1685 #define __FUNCT__ "MatPermute_MPIAIJ"
1686 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1687 {
1688   MPI_Comm       comm,pcomm;
1689   PetscInt       first,local_size,nrows;
1690   const PetscInt *rows;
1691   PetscMPIInt    size;
1692   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1693   PetscErrorCode ierr;
1694 
1695   PetscFunctionBegin;
1696   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1697   /* make a collective version of 'rowp' */
1698   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1699   if (pcomm==comm) {
1700     crowp = rowp;
1701   } else {
1702     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1703     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1704     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1705     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1706   }
1707   /* collect the global row permutation and invert it */
1708   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1709   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1710   if (pcomm!=comm) {
1711     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1712   }
1713   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1714   /* get the local target indices */
1715   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1716   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1717   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1718   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1719   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1720   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1721   /* the column permutation is so much easier;
1722      make a local version of 'colp' and invert it */
1723   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1724   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1725   if (size==1) {
1726     lcolp = colp;
1727   } else {
1728     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1729     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1730     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1731   }
1732   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1733   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1734   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1735   if (size>1) {
1736     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1737     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1738   }
1739   /* now we just get the submatrix */
1740   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1741   /* clean up */
1742   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1743   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1749 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1750 {
1751   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1752   Mat            A = mat->A,B = mat->B;
1753   PetscErrorCode ierr;
1754   PetscReal      isend[5],irecv[5];
1755 
1756   PetscFunctionBegin;
1757   info->block_size     = 1.0;
1758   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1759   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1760   isend[3] = info->memory;  isend[4] = info->mallocs;
1761   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1762   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1763   isend[3] += info->memory;  isend[4] += info->mallocs;
1764   if (flag == MAT_LOCAL) {
1765     info->nz_used      = isend[0];
1766     info->nz_allocated = isend[1];
1767     info->nz_unneeded  = isend[2];
1768     info->memory       = isend[3];
1769     info->mallocs      = isend[4];
1770   } else if (flag == MAT_GLOBAL_MAX) {
1771     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1772     info->nz_used      = irecv[0];
1773     info->nz_allocated = irecv[1];
1774     info->nz_unneeded  = irecv[2];
1775     info->memory       = irecv[3];
1776     info->mallocs      = irecv[4];
1777   } else if (flag == MAT_GLOBAL_SUM) {
1778     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1779     info->nz_used      = irecv[0];
1780     info->nz_allocated = irecv[1];
1781     info->nz_unneeded  = irecv[2];
1782     info->memory       = irecv[3];
1783     info->mallocs      = irecv[4];
1784   }
1785   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1786   info->fill_ratio_needed = 0;
1787   info->factor_mallocs    = 0;
1788 
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 #undef __FUNCT__
1793 #define __FUNCT__ "MatSetOption_MPIAIJ"
1794 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1795 {
1796   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1797   PetscErrorCode ierr;
1798 
1799   PetscFunctionBegin;
1800   switch (op) {
1801   case MAT_NEW_NONZERO_LOCATIONS:
1802   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1803   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1804   case MAT_KEEP_NONZERO_PATTERN:
1805   case MAT_NEW_NONZERO_LOCATION_ERR:
1806   case MAT_USE_INODES:
1807   case MAT_IGNORE_ZERO_ENTRIES:
1808     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1809     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1810     break;
1811   case MAT_ROW_ORIENTED:
1812     a->roworiented = flg;
1813     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1814     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1815     break;
1816   case MAT_NEW_DIAGONALS:
1817     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1818     break;
1819   case MAT_IGNORE_OFF_PROC_ENTRIES:
1820     a->donotstash = PETSC_TRUE;
1821     break;
1822   case MAT_SPD:
1823     A->spd_set                         = PETSC_TRUE;
1824     A->spd                             = flg;
1825     if (flg) {
1826       A->symmetric                     = PETSC_TRUE;
1827       A->structurally_symmetric        = PETSC_TRUE;
1828       A->symmetric_set                 = PETSC_TRUE;
1829       A->structurally_symmetric_set    = PETSC_TRUE;
1830     }
1831     break;
1832   case MAT_SYMMETRIC:
1833     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1834     break;
1835   case MAT_STRUCTURALLY_SYMMETRIC:
1836     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1837     break;
1838   case MAT_HERMITIAN:
1839     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1840     break;
1841   case MAT_SYMMETRY_ETERNAL:
1842     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1843     break;
1844   default:
1845     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1846   }
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatGetRow_MPIAIJ"
1852 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1853 {
1854   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1855   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1856   PetscErrorCode ierr;
1857   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1858   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1859   PetscInt       *cmap,*idx_p;
1860 
1861   PetscFunctionBegin;
1862   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1863   mat->getrowactive = PETSC_TRUE;
1864 
1865   if (!mat->rowvalues && (idx || v)) {
1866     /*
1867         allocate enough space to hold information from the longest row.
1868     */
1869     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1870     PetscInt   max = 1,tmp;
1871     for (i=0; i<matin->rmap->n; i++) {
1872       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1873       if (max < tmp) { max = tmp; }
1874     }
1875     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1876   }
1877 
1878   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1879   lrow = row - rstart;
1880 
1881   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1882   if (!v)   {pvA = 0; pvB = 0;}
1883   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1884   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1885   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1886   nztot = nzA + nzB;
1887 
1888   cmap  = mat->garray;
1889   if (v  || idx) {
1890     if (nztot) {
1891       /* Sort by increasing column numbers, assuming A and B already sorted */
1892       PetscInt imark = -1;
1893       if (v) {
1894         *v = v_p = mat->rowvalues;
1895         for (i=0; i<nzB; i++) {
1896           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1897           else break;
1898         }
1899         imark = i;
1900         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1901         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1902       }
1903       if (idx) {
1904         *idx = idx_p = mat->rowindices;
1905         if (imark > -1) {
1906           for (i=0; i<imark; i++) {
1907             idx_p[i] = cmap[cworkB[i]];
1908           }
1909         } else {
1910           for (i=0; i<nzB; i++) {
1911             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1912             else break;
1913           }
1914           imark = i;
1915         }
1916         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1917         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1918       }
1919     } else {
1920       if (idx) *idx = 0;
1921       if (v)   *v   = 0;
1922     }
1923   }
1924   *nz = nztot;
1925   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1926   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 #undef __FUNCT__
1931 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1932 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1933 {
1934   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1935 
1936   PetscFunctionBegin;
1937   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1938   aij->getrowactive = PETSC_FALSE;
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatNorm_MPIAIJ"
1944 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1945 {
1946   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1947   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1948   PetscErrorCode ierr;
1949   PetscInt       i,j,cstart = mat->cmap->rstart;
1950   PetscReal      sum = 0.0;
1951   MatScalar      *v;
1952 
1953   PetscFunctionBegin;
1954   if (aij->size == 1) {
1955     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1956   } else {
1957     if (type == NORM_FROBENIUS) {
1958       v = amat->a;
1959       for (i=0; i<amat->nz; i++) {
1960 #if defined(PETSC_USE_COMPLEX)
1961         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1962 #else
1963         sum += (*v)*(*v); v++;
1964 #endif
1965       }
1966       v = bmat->a;
1967       for (i=0; i<bmat->nz; i++) {
1968 #if defined(PETSC_USE_COMPLEX)
1969         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1970 #else
1971         sum += (*v)*(*v); v++;
1972 #endif
1973       }
1974       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1975       *norm = sqrt(*norm);
1976     } else if (type == NORM_1) { /* max column norm */
1977       PetscReal *tmp,*tmp2;
1978       PetscInt  *jj,*garray = aij->garray;
1979       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1980       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1981       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1982       *norm = 0.0;
1983       v = amat->a; jj = amat->j;
1984       for (j=0; j<amat->nz; j++) {
1985         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1986       }
1987       v = bmat->a; jj = bmat->j;
1988       for (j=0; j<bmat->nz; j++) {
1989         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1990       }
1991       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1992       for (j=0; j<mat->cmap->N; j++) {
1993         if (tmp2[j] > *norm) *norm = tmp2[j];
1994       }
1995       ierr = PetscFree(tmp);CHKERRQ(ierr);
1996       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1997     } else if (type == NORM_INFINITY) { /* max row norm */
1998       PetscReal ntemp = 0.0;
1999       for (j=0; j<aij->A->rmap->n; j++) {
2000         v = amat->a + amat->i[j];
2001         sum = 0.0;
2002         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2003           sum += PetscAbsScalar(*v); v++;
2004         }
2005         v = bmat->a + bmat->i[j];
2006         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2007           sum += PetscAbsScalar(*v); v++;
2008         }
2009         if (sum > ntemp) ntemp = sum;
2010       }
2011       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2012     } else {
2013       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2014     }
2015   }
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 #undef __FUNCT__
2020 #define __FUNCT__ "MatTranspose_MPIAIJ"
2021 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2022 {
2023   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2024   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2025   PetscErrorCode ierr;
2026   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2027   PetscInt       cstart=A->cmap->rstart,ncol;
2028   Mat            B;
2029   MatScalar      *array;
2030 
2031   PetscFunctionBegin;
2032   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2033 
2034   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2035   ai = Aloc->i; aj = Aloc->j;
2036   bi = Bloc->i; bj = Bloc->j;
2037   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2038     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2039     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2040     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2041     for (i=0; i<ai[ma]; i++){
2042       d_nnz[aj[i]] ++;
2043       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2044     }
2045 
2046     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2047     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2048     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2049     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2050     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2051   } else {
2052     B = *matout;
2053   }
2054 
2055   /* copy over the A part */
2056   array = Aloc->a;
2057   row = A->rmap->rstart;
2058   for (i=0; i<ma; i++) {
2059     ncol = ai[i+1]-ai[i];
2060     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2061     row++; array += ncol; aj += ncol;
2062   }
2063   aj = Aloc->j;
2064   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2065 
2066   /* copy over the B part */
2067   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2068   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2069   array = Bloc->a;
2070   row = A->rmap->rstart;
2071   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2072   cols_tmp = cols;
2073   for (i=0; i<mb; i++) {
2074     ncol = bi[i+1]-bi[i];
2075     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2076     row++; array += ncol; cols_tmp += ncol;
2077   }
2078   ierr = PetscFree(cols);CHKERRQ(ierr);
2079 
2080   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2081   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2082   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2083     *matout = B;
2084   } else {
2085     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2086   }
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #undef __FUNCT__
2091 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2092 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2093 {
2094   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2095   Mat            a = aij->A,b = aij->B;
2096   PetscErrorCode ierr;
2097   PetscInt       s1,s2,s3;
2098 
2099   PetscFunctionBegin;
2100   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2101   if (rr) {
2102     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2103     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2104     /* Overlap communication with computation. */
2105     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2106   }
2107   if (ll) {
2108     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2109     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2110     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2111   }
2112   /* scale  the diagonal block */
2113   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2114 
2115   if (rr) {
2116     /* Do a scatter end and then right scale the off-diagonal block */
2117     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2118     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2119   }
2120 
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 #undef __FUNCT__
2125 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2126 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2127 {
2128   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2129   PetscErrorCode ierr;
2130 
2131   PetscFunctionBegin;
2132   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2133   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2134   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2135   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2136   PetscFunctionReturn(0);
2137 }
2138 #undef __FUNCT__
2139 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2140 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2141 {
2142   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2143   PetscErrorCode ierr;
2144 
2145   PetscFunctionBegin;
2146   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 #undef __FUNCT__
2151 #define __FUNCT__ "MatEqual_MPIAIJ"
2152 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2153 {
2154   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2155   Mat            a,b,c,d;
2156   PetscBool      flg;
2157   PetscErrorCode ierr;
2158 
2159   PetscFunctionBegin;
2160   a = matA->A; b = matA->B;
2161   c = matB->A; d = matB->B;
2162 
2163   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2164   if (flg) {
2165     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2166   }
2167   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 #undef __FUNCT__
2172 #define __FUNCT__ "MatCopy_MPIAIJ"
2173 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2174 {
2175   PetscErrorCode ierr;
2176   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2177   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2178 
2179   PetscFunctionBegin;
2180   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2181   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2182     /* because of the column compression in the off-processor part of the matrix a->B,
2183        the number of columns in a->B and b->B may be different, hence we cannot call
2184        the MatCopy() directly on the two parts. If need be, we can provide a more
2185        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2186        then copying the submatrices */
2187     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2188   } else {
2189     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2190     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2191   }
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2197 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2198 {
2199   PetscErrorCode ierr;
2200 
2201   PetscFunctionBegin;
2202   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 #undef __FUNCT__
2207 #define __FUNCT__ "MatAXPY_MPIAIJ"
2208 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2209 {
2210   PetscErrorCode ierr;
2211   PetscInt       i;
2212   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2213   PetscBLASInt   bnz,one=1;
2214   Mat_SeqAIJ     *x,*y;
2215 
2216   PetscFunctionBegin;
2217   if (str == SAME_NONZERO_PATTERN) {
2218     PetscScalar alpha = a;
2219     x = (Mat_SeqAIJ *)xx->A->data;
2220     y = (Mat_SeqAIJ *)yy->A->data;
2221     bnz = PetscBLASIntCast(x->nz);
2222     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2223     x = (Mat_SeqAIJ *)xx->B->data;
2224     y = (Mat_SeqAIJ *)yy->B->data;
2225     bnz = PetscBLASIntCast(x->nz);
2226     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2227   } else if (str == SUBSET_NONZERO_PATTERN) {
2228     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2229 
2230     x = (Mat_SeqAIJ *)xx->B->data;
2231     y = (Mat_SeqAIJ *)yy->B->data;
2232     if (y->xtoy && y->XtoY != xx->B) {
2233       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2234       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2235     }
2236     if (!y->xtoy) { /* get xtoy */
2237       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2238       y->XtoY = xx->B;
2239       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2240     }
2241     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2242   } else {
2243     Mat B;
2244     PetscInt *nnz_d,*nnz_o;
2245     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2246     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2247     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2248     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2249     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2250     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2251     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2252     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2253     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2254     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2255     ierr = MatHeaderReplace(Y,B);
2256     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2257     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2258   }
2259   PetscFunctionReturn(0);
2260 }
2261 
2262 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatConjugate_MPIAIJ"
2266 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2267 {
2268 #if defined(PETSC_USE_COMPLEX)
2269   PetscErrorCode ierr;
2270   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2271 
2272   PetscFunctionBegin;
2273   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2274   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2275 #else
2276   PetscFunctionBegin;
2277 #endif
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "MatRealPart_MPIAIJ"
2283 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2284 {
2285   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2286   PetscErrorCode ierr;
2287 
2288   PetscFunctionBegin;
2289   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2290   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2296 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2297 {
2298   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2299   PetscErrorCode ierr;
2300 
2301   PetscFunctionBegin;
2302   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2303   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 #ifdef PETSC_HAVE_PBGL
2308 
2309 #include <boost/parallel/mpi/bsp_process_group.hpp>
2310 #include <boost/graph/distributed/ilu_default_graph.hpp>
2311 #include <boost/graph/distributed/ilu_0_block.hpp>
2312 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2313 #include <boost/graph/distributed/petsc/interface.hpp>
2314 #include <boost/multi_array.hpp>
2315 #include <boost/parallel/distributed_property_map->hpp>
2316 
2317 #undef __FUNCT__
2318 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2319 /*
2320   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2321 */
2322 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2323 {
2324   namespace petsc = boost::distributed::petsc;
2325 
2326   namespace graph_dist = boost::graph::distributed;
2327   using boost::graph::distributed::ilu_default::process_group_type;
2328   using boost::graph::ilu_permuted;
2329 
2330   PetscBool       row_identity, col_identity;
2331   PetscContainer  c;
2332   PetscInt        m, n, M, N;
2333   PetscErrorCode  ierr;
2334 
2335   PetscFunctionBegin;
2336   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2337   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2338   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2339   if (!row_identity || !col_identity) {
2340     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2341   }
2342 
2343   process_group_type pg;
2344   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2345   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2346   lgraph_type&   level_graph = *lgraph_p;
2347   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2348 
2349   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2350   ilu_permuted(level_graph);
2351 
2352   /* put together the new matrix */
2353   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2354   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2355   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2356   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2357   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2358   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2359   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2360 
2361   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2362   ierr = PetscContainerSetPointer(c, lgraph_p);
2363   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2364   ierr = PetscContainerDestroy(&c);
2365   PetscFunctionReturn(0);
2366 }
2367 
2368 #undef __FUNCT__
2369 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2370 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2371 {
2372   PetscFunctionBegin;
2373   PetscFunctionReturn(0);
2374 }
2375 
2376 #undef __FUNCT__
2377 #define __FUNCT__ "MatSolve_MPIAIJ"
2378 /*
2379   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2380 */
2381 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2382 {
2383   namespace graph_dist = boost::graph::distributed;
2384 
2385   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2386   lgraph_type*   lgraph_p;
2387   PetscContainer c;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2392   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2393   ierr = VecCopy(b, x);CHKERRQ(ierr);
2394 
2395   PetscScalar* array_x;
2396   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2397   PetscInt sx;
2398   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2399 
2400   PetscScalar* array_b;
2401   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2402   PetscInt sb;
2403   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2404 
2405   lgraph_type&   level_graph = *lgraph_p;
2406   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2407 
2408   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2409   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2410                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2411 
2412   typedef boost::iterator_property_map<array_ref_type::iterator,
2413                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2414   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2415                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2416 
2417   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2418 
2419   PetscFunctionReturn(0);
2420 }
2421 #endif
2422 
2423 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2424   PetscInt       nzlocal,nsends,nrecvs;
2425   PetscMPIInt    *send_rank,*recv_rank;
2426   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2427   PetscScalar    *sbuf_a,**rbuf_a;
2428   PetscErrorCode (*Destroy)(Mat);
2429 } Mat_Redundant;
2430 
2431 #undef __FUNCT__
2432 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2433 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2434 {
2435   PetscErrorCode       ierr;
2436   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2437   PetscInt             i;
2438 
2439   PetscFunctionBegin;
2440   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2441   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2442   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2443   for (i=0; i<redund->nrecvs; i++){
2444     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2445     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2446   }
2447   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2448   ierr = PetscFree(redund);CHKERRQ(ierr);
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 #undef __FUNCT__
2453 #define __FUNCT__ "MatDestroy_MatRedundant"
2454 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2455 {
2456   PetscErrorCode  ierr;
2457   PetscContainer  container;
2458   Mat_Redundant   *redund=PETSC_NULL;
2459 
2460   PetscFunctionBegin;
2461   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2462   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2463   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2464   A->ops->destroy = redund->Destroy;
2465   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2466   if (A->ops->destroy) {
2467     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2468   }
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 #undef __FUNCT__
2473 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2474 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2475 {
2476   PetscMPIInt    rank,size;
2477   MPI_Comm       comm=((PetscObject)mat)->comm;
2478   PetscErrorCode ierr;
2479   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2480   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2481   PetscInt       *rowrange=mat->rmap->range;
2482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2483   Mat            A=aij->A,B=aij->B,C=*matredundant;
2484   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2485   PetscScalar    *sbuf_a;
2486   PetscInt       nzlocal=a->nz+b->nz;
2487   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2488   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2489   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2490   MatScalar      *aworkA,*aworkB;
2491   PetscScalar    *vals;
2492   PetscMPIInt    tag1,tag2,tag3,imdex;
2493   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2494                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2495   MPI_Status     recv_status,*send_status;
2496   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2497   PetscInt       **rbuf_j=PETSC_NULL;
2498   PetscScalar    **rbuf_a=PETSC_NULL;
2499   Mat_Redundant  *redund=PETSC_NULL;
2500   PetscContainer container;
2501 
2502   PetscFunctionBegin;
2503   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2504   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2505 
2506   if (reuse == MAT_REUSE_MATRIX) {
2507     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2508     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2509     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2510     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2511     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2512     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2513     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2514     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2515 
2516     nsends    = redund->nsends;
2517     nrecvs    = redund->nrecvs;
2518     send_rank = redund->send_rank;
2519     recv_rank = redund->recv_rank;
2520     sbuf_nz   = redund->sbuf_nz;
2521     rbuf_nz   = redund->rbuf_nz;
2522     sbuf_j    = redund->sbuf_j;
2523     sbuf_a    = redund->sbuf_a;
2524     rbuf_j    = redund->rbuf_j;
2525     rbuf_a    = redund->rbuf_a;
2526   }
2527 
2528   if (reuse == MAT_INITIAL_MATRIX){
2529     PetscMPIInt  subrank,subsize;
2530     PetscInt     nleftover,np_subcomm;
2531     /* get the destination processors' id send_rank, nsends and nrecvs */
2532     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2533     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2534     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2535     np_subcomm = size/nsubcomm;
2536     nleftover  = size - nsubcomm*np_subcomm;
2537     nsends = 0; nrecvs = 0;
2538     for (i=0; i<size; i++){ /* i=rank*/
2539       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2540         send_rank[nsends] = i; nsends++;
2541         recv_rank[nrecvs++] = i;
2542       }
2543     }
2544     if (rank >= size - nleftover){/* this proc is a leftover processor */
2545       i = size-nleftover-1;
2546       j = 0;
2547       while (j < nsubcomm - nleftover){
2548         send_rank[nsends++] = i;
2549         i--; j++;
2550       }
2551     }
2552 
2553     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2554       for (i=0; i<nleftover; i++){
2555         recv_rank[nrecvs++] = size-nleftover+i;
2556       }
2557     }
2558 
2559     /* allocate sbuf_j, sbuf_a */
2560     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2561     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2562     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2563   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2564 
2565   /* copy mat's local entries into the buffers */
2566   if (reuse == MAT_INITIAL_MATRIX){
2567     rownz_max = 0;
2568     rptr = sbuf_j;
2569     cols = sbuf_j + rend-rstart + 1;
2570     vals = sbuf_a;
2571     rptr[0] = 0;
2572     for (i=0; i<rend-rstart; i++){
2573       row = i + rstart;
2574       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2575       ncols  = nzA + nzB;
2576       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2577       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2578       /* load the column indices for this row into cols */
2579       lwrite = 0;
2580       for (l=0; l<nzB; l++) {
2581         if ((ctmp = bmap[cworkB[l]]) < cstart){
2582           vals[lwrite]   = aworkB[l];
2583           cols[lwrite++] = ctmp;
2584         }
2585       }
2586       for (l=0; l<nzA; l++){
2587         vals[lwrite]   = aworkA[l];
2588         cols[lwrite++] = cstart + cworkA[l];
2589       }
2590       for (l=0; l<nzB; l++) {
2591         if ((ctmp = bmap[cworkB[l]]) >= cend){
2592           vals[lwrite]   = aworkB[l];
2593           cols[lwrite++] = ctmp;
2594         }
2595       }
2596       vals += ncols;
2597       cols += ncols;
2598       rptr[i+1] = rptr[i] + ncols;
2599       if (rownz_max < ncols) rownz_max = ncols;
2600     }
2601     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2602   } else { /* only copy matrix values into sbuf_a */
2603     rptr = sbuf_j;
2604     vals = sbuf_a;
2605     rptr[0] = 0;
2606     for (i=0; i<rend-rstart; i++){
2607       row = i + rstart;
2608       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2609       ncols  = nzA + nzB;
2610       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2611       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2612       lwrite = 0;
2613       for (l=0; l<nzB; l++) {
2614         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2615       }
2616       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2617       for (l=0; l<nzB; l++) {
2618         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2619       }
2620       vals += ncols;
2621       rptr[i+1] = rptr[i] + ncols;
2622     }
2623   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2624 
2625   /* send nzlocal to others, and recv other's nzlocal */
2626   /*--------------------------------------------------*/
2627   if (reuse == MAT_INITIAL_MATRIX){
2628     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2629     s_waits2 = s_waits3 + nsends;
2630     s_waits1 = s_waits2 + nsends;
2631     r_waits1 = s_waits1 + nsends;
2632     r_waits2 = r_waits1 + nrecvs;
2633     r_waits3 = r_waits2 + nrecvs;
2634   } else {
2635     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2636     r_waits3 = s_waits3 + nsends;
2637   }
2638 
2639   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2640   if (reuse == MAT_INITIAL_MATRIX){
2641     /* get new tags to keep the communication clean */
2642     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2643     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2644     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2645 
2646     /* post receives of other's nzlocal */
2647     for (i=0; i<nrecvs; i++){
2648       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2649     }
2650     /* send nzlocal to others */
2651     for (i=0; i<nsends; i++){
2652       sbuf_nz[i] = nzlocal;
2653       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2654     }
2655     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2656     count = nrecvs;
2657     while (count) {
2658       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2659       recv_rank[imdex] = recv_status.MPI_SOURCE;
2660       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2661       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2662 
2663       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2664       rbuf_nz[imdex] += i + 2;
2665       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2666       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2667       count--;
2668     }
2669     /* wait on sends of nzlocal */
2670     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2671     /* send mat->i,j to others, and recv from other's */
2672     /*------------------------------------------------*/
2673     for (i=0; i<nsends; i++){
2674       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2675       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2676     }
2677     /* wait on receives of mat->i,j */
2678     /*------------------------------*/
2679     count = nrecvs;
2680     while (count) {
2681       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2682       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2683       count--;
2684     }
2685     /* wait on sends of mat->i,j */
2686     /*---------------------------*/
2687     if (nsends) {
2688       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2689     }
2690   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2691 
2692   /* post receives, send and receive mat->a */
2693   /*----------------------------------------*/
2694   for (imdex=0; imdex<nrecvs; imdex++) {
2695     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2696   }
2697   for (i=0; i<nsends; i++){
2698     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2699   }
2700   count = nrecvs;
2701   while (count) {
2702     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2703     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2704     count--;
2705   }
2706   if (nsends) {
2707     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2708   }
2709 
2710   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2711 
2712   /* create redundant matrix */
2713   /*-------------------------*/
2714   if (reuse == MAT_INITIAL_MATRIX){
2715     /* compute rownz_max for preallocation */
2716     for (imdex=0; imdex<nrecvs; imdex++){
2717       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2718       rptr = rbuf_j[imdex];
2719       for (i=0; i<j; i++){
2720         ncols = rptr[i+1] - rptr[i];
2721         if (rownz_max < ncols) rownz_max = ncols;
2722       }
2723     }
2724 
2725     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2726     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2727     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2728     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2729     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2730   } else {
2731     C = *matredundant;
2732   }
2733 
2734   /* insert local matrix entries */
2735   rptr = sbuf_j;
2736   cols = sbuf_j + rend-rstart + 1;
2737   vals = sbuf_a;
2738   for (i=0; i<rend-rstart; i++){
2739     row   = i + rstart;
2740     ncols = rptr[i+1] - rptr[i];
2741     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2742     vals += ncols;
2743     cols += ncols;
2744   }
2745   /* insert received matrix entries */
2746   for (imdex=0; imdex<nrecvs; imdex++){
2747     rstart = rowrange[recv_rank[imdex]];
2748     rend   = rowrange[recv_rank[imdex]+1];
2749     rptr = rbuf_j[imdex];
2750     cols = rbuf_j[imdex] + rend-rstart + 1;
2751     vals = rbuf_a[imdex];
2752     for (i=0; i<rend-rstart; i++){
2753       row   = i + rstart;
2754       ncols = rptr[i+1] - rptr[i];
2755       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2756       vals += ncols;
2757       cols += ncols;
2758     }
2759   }
2760   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2761   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2762   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2763   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2764   if (reuse == MAT_INITIAL_MATRIX) {
2765     PetscContainer container;
2766     *matredundant = C;
2767     /* create a supporting struct and attach it to C for reuse */
2768     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2769     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2770     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2771     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2772     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2773     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2774 
2775     redund->nzlocal = nzlocal;
2776     redund->nsends  = nsends;
2777     redund->nrecvs  = nrecvs;
2778     redund->send_rank = send_rank;
2779     redund->recv_rank = recv_rank;
2780     redund->sbuf_nz = sbuf_nz;
2781     redund->rbuf_nz = rbuf_nz;
2782     redund->sbuf_j  = sbuf_j;
2783     redund->sbuf_a  = sbuf_a;
2784     redund->rbuf_j  = rbuf_j;
2785     redund->rbuf_a  = rbuf_a;
2786 
2787     redund->Destroy = C->ops->destroy;
2788     C->ops->destroy = MatDestroy_MatRedundant;
2789   }
2790   PetscFunctionReturn(0);
2791 }
2792 
2793 #undef __FUNCT__
2794 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2795 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2796 {
2797   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2798   PetscErrorCode ierr;
2799   PetscInt       i,*idxb = 0;
2800   PetscScalar    *va,*vb;
2801   Vec            vtmp;
2802 
2803   PetscFunctionBegin;
2804   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2805   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2806   if (idx) {
2807     for (i=0; i<A->rmap->n; i++) {
2808       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2809     }
2810   }
2811 
2812   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2813   if (idx) {
2814     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2815   }
2816   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2817   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2818 
2819   for (i=0; i<A->rmap->n; i++){
2820     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2821       va[i] = vb[i];
2822       if (idx) idx[i] = a->garray[idxb[i]];
2823     }
2824   }
2825 
2826   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2827   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2828   ierr = PetscFree(idxb);CHKERRQ(ierr);
2829   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2830   PetscFunctionReturn(0);
2831 }
2832 
2833 #undef __FUNCT__
2834 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2835 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2836 {
2837   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2838   PetscErrorCode ierr;
2839   PetscInt       i,*idxb = 0;
2840   PetscScalar    *va,*vb;
2841   Vec            vtmp;
2842 
2843   PetscFunctionBegin;
2844   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2845   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2846   if (idx) {
2847     for (i=0; i<A->cmap->n; i++) {
2848       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2849     }
2850   }
2851 
2852   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2853   if (idx) {
2854     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2855   }
2856   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2857   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2858 
2859   for (i=0; i<A->rmap->n; i++){
2860     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2861       va[i] = vb[i];
2862       if (idx) idx[i] = a->garray[idxb[i]];
2863     }
2864   }
2865 
2866   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2867   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2868   ierr = PetscFree(idxb);CHKERRQ(ierr);
2869   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2870   PetscFunctionReturn(0);
2871 }
2872 
2873 #undef __FUNCT__
2874 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2875 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2876 {
2877   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2878   PetscInt       n      = A->rmap->n;
2879   PetscInt       cstart = A->cmap->rstart;
2880   PetscInt      *cmap   = mat->garray;
2881   PetscInt      *diagIdx, *offdiagIdx;
2882   Vec            diagV, offdiagV;
2883   PetscScalar   *a, *diagA, *offdiagA;
2884   PetscInt       r;
2885   PetscErrorCode ierr;
2886 
2887   PetscFunctionBegin;
2888   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2889   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2890   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2891   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2892   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2893   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2894   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2895   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2896   for(r = 0; r < n; ++r) {
2897     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2898       a[r]   = diagA[r];
2899       idx[r] = cstart + diagIdx[r];
2900     } else {
2901       a[r]   = offdiagA[r];
2902       idx[r] = cmap[offdiagIdx[r]];
2903     }
2904   }
2905   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2906   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2907   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2908   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2909   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2910   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2911   PetscFunctionReturn(0);
2912 }
2913 
2914 #undef __FUNCT__
2915 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2916 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2917 {
2918   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2919   PetscInt       n      = A->rmap->n;
2920   PetscInt       cstart = A->cmap->rstart;
2921   PetscInt      *cmap   = mat->garray;
2922   PetscInt      *diagIdx, *offdiagIdx;
2923   Vec            diagV, offdiagV;
2924   PetscScalar   *a, *diagA, *offdiagA;
2925   PetscInt       r;
2926   PetscErrorCode ierr;
2927 
2928   PetscFunctionBegin;
2929   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2930   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2931   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2932   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2933   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2934   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2935   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2936   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2937   for(r = 0; r < n; ++r) {
2938     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2939       a[r]   = diagA[r];
2940       idx[r] = cstart + diagIdx[r];
2941     } else {
2942       a[r]   = offdiagA[r];
2943       idx[r] = cmap[offdiagIdx[r]];
2944     }
2945   }
2946   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2947   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2948   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2949   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2950   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2951   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2952   PetscFunctionReturn(0);
2953 }
2954 
2955 #undef __FUNCT__
2956 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2957 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2958 {
2959   PetscErrorCode ierr;
2960   Mat            *dummy;
2961 
2962   PetscFunctionBegin;
2963   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2964   *newmat = *dummy;
2965   ierr = PetscFree(dummy);CHKERRQ(ierr);
2966   PetscFunctionReturn(0);
2967 }
2968 
2969 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2970 /* -------------------------------------------------------------------*/
2971 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2972        MatGetRow_MPIAIJ,
2973        MatRestoreRow_MPIAIJ,
2974        MatMult_MPIAIJ,
2975 /* 4*/ MatMultAdd_MPIAIJ,
2976        MatMultTranspose_MPIAIJ,
2977        MatMultTransposeAdd_MPIAIJ,
2978 #ifdef PETSC_HAVE_PBGL
2979        MatSolve_MPIAIJ,
2980 #else
2981        0,
2982 #endif
2983        0,
2984        0,
2985 /*10*/ 0,
2986        0,
2987        0,
2988        MatSOR_MPIAIJ,
2989        MatTranspose_MPIAIJ,
2990 /*15*/ MatGetInfo_MPIAIJ,
2991        MatEqual_MPIAIJ,
2992        MatGetDiagonal_MPIAIJ,
2993        MatDiagonalScale_MPIAIJ,
2994        MatNorm_MPIAIJ,
2995 /*20*/ MatAssemblyBegin_MPIAIJ,
2996        MatAssemblyEnd_MPIAIJ,
2997        MatSetOption_MPIAIJ,
2998        MatZeroEntries_MPIAIJ,
2999 /*24*/ MatZeroRows_MPIAIJ,
3000        0,
3001 #ifdef PETSC_HAVE_PBGL
3002        0,
3003 #else
3004        0,
3005 #endif
3006        0,
3007        0,
3008 /*29*/ MatSetUpPreallocation_MPIAIJ,
3009 #ifdef PETSC_HAVE_PBGL
3010        0,
3011 #else
3012        0,
3013 #endif
3014        0,
3015        0,
3016        0,
3017 /*34*/ MatDuplicate_MPIAIJ,
3018        0,
3019        0,
3020        0,
3021        0,
3022 /*39*/ MatAXPY_MPIAIJ,
3023        MatGetSubMatrices_MPIAIJ,
3024        MatIncreaseOverlap_MPIAIJ,
3025        MatGetValues_MPIAIJ,
3026        MatCopy_MPIAIJ,
3027 /*44*/ MatGetRowMax_MPIAIJ,
3028        MatScale_MPIAIJ,
3029        0,
3030        0,
3031        MatZeroRowsColumns_MPIAIJ,
3032 /*49*/ MatSetBlockSize_MPIAIJ,
3033        0,
3034        0,
3035        0,
3036        0,
3037 /*54*/ MatFDColoringCreate_MPIAIJ,
3038        0,
3039        MatSetUnfactored_MPIAIJ,
3040        MatPermute_MPIAIJ,
3041        0,
3042 /*59*/ MatGetSubMatrix_MPIAIJ,
3043        MatDestroy_MPIAIJ,
3044        MatView_MPIAIJ,
3045        0,
3046        0,
3047 /*64*/ 0,
3048        0,
3049        0,
3050        0,
3051        0,
3052 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3053        MatGetRowMinAbs_MPIAIJ,
3054        0,
3055        MatSetColoring_MPIAIJ,
3056 #if defined(PETSC_HAVE_ADIC)
3057        MatSetValuesAdic_MPIAIJ,
3058 #else
3059        0,
3060 #endif
3061        MatSetValuesAdifor_MPIAIJ,
3062 /*75*/ MatFDColoringApply_AIJ,
3063        0,
3064        0,
3065        0,
3066        0,
3067 /*80*/ 0,
3068        0,
3069        0,
3070 /*83*/ MatLoad_MPIAIJ,
3071        0,
3072        0,
3073        0,
3074        0,
3075        0,
3076 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3077        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3078        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3079        MatPtAP_Basic,
3080        MatPtAPSymbolic_MPIAIJ,
3081 /*94*/ MatPtAPNumeric_MPIAIJ,
3082        0,
3083        0,
3084        0,
3085        0,
3086 /*99*/ 0,
3087        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3088        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3089        MatConjugate_MPIAIJ,
3090        0,
3091 /*104*/MatSetValuesRow_MPIAIJ,
3092        MatRealPart_MPIAIJ,
3093        MatImaginaryPart_MPIAIJ,
3094        0,
3095        0,
3096 /*109*/0,
3097        MatGetRedundantMatrix_MPIAIJ,
3098        MatGetRowMin_MPIAIJ,
3099        0,
3100        0,
3101 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
3102        0,
3103        0,
3104        0,
3105        0,
3106 /*119*/0,
3107        0,
3108        0,
3109        0,
3110        MatGetMultiProcBlock_MPIAIJ,
3111 /*124*/MatFindNonZeroRows_MPIAIJ,
3112        MatGetColumnNorms_MPIAIJ,
3113        0,
3114        0,
3115        MatGetSubMatricesParallel_MPIAIJ
3116 };
3117 
3118 /* ----------------------------------------------------------------------------------------*/
3119 
3120 EXTERN_C_BEGIN
3121 #undef __FUNCT__
3122 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3123 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3124 {
3125   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3126   PetscErrorCode ierr;
3127 
3128   PetscFunctionBegin;
3129   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3130   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3131   PetscFunctionReturn(0);
3132 }
3133 EXTERN_C_END
3134 
3135 EXTERN_C_BEGIN
3136 #undef __FUNCT__
3137 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3138 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3139 {
3140   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3141   PetscErrorCode ierr;
3142 
3143   PetscFunctionBegin;
3144   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3145   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3146   PetscFunctionReturn(0);
3147 }
3148 EXTERN_C_END
3149 
3150 EXTERN_C_BEGIN
3151 #undef __FUNCT__
3152 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3153 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3154 {
3155   Mat_MPIAIJ     *b;
3156   PetscErrorCode ierr;
3157   PetscInt       i;
3158 
3159   PetscFunctionBegin;
3160   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3161   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3162   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3163   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3164 
3165   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3166   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3167   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3168   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3169   if (d_nnz) {
3170     for (i=0; i<B->rmap->n; i++) {
3171       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3172     }
3173   }
3174   if (o_nnz) {
3175     for (i=0; i<B->rmap->n; i++) {
3176       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3177     }
3178   }
3179   b = (Mat_MPIAIJ*)B->data;
3180 
3181   if (!B->preallocated) {
3182     /* Explicitly create 2 MATSEQAIJ matrices. */
3183     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3184     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3185     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3186     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3187     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3188     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3189     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3190     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3191   }
3192 
3193   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3194   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3195   B->preallocated = PETSC_TRUE;
3196   PetscFunctionReturn(0);
3197 }
3198 EXTERN_C_END
3199 
3200 #undef __FUNCT__
3201 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3202 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3203 {
3204   Mat            mat;
3205   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3206   PetscErrorCode ierr;
3207 
3208   PetscFunctionBegin;
3209   *newmat       = 0;
3210   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3211   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3212   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3213   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3214   a    = (Mat_MPIAIJ*)mat->data;
3215 
3216   mat->factortype    = matin->factortype;
3217   mat->rmap->bs      = matin->rmap->bs;
3218   mat->assembled    = PETSC_TRUE;
3219   mat->insertmode   = NOT_SET_VALUES;
3220   mat->preallocated = PETSC_TRUE;
3221 
3222   a->size           = oldmat->size;
3223   a->rank           = oldmat->rank;
3224   a->donotstash     = oldmat->donotstash;
3225   a->roworiented    = oldmat->roworiented;
3226   a->rowindices     = 0;
3227   a->rowvalues      = 0;
3228   a->getrowactive   = PETSC_FALSE;
3229 
3230   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3231   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3232 
3233   if (oldmat->colmap) {
3234 #if defined (PETSC_USE_CTABLE)
3235     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3236 #else
3237     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3238     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3239     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3240 #endif
3241   } else a->colmap = 0;
3242   if (oldmat->garray) {
3243     PetscInt len;
3244     len  = oldmat->B->cmap->n;
3245     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3246     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3247     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3248   } else a->garray = 0;
3249 
3250   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3251   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3252   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3253   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3254   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3255   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3256   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3257   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3258   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3259   *newmat = mat;
3260   PetscFunctionReturn(0);
3261 }
3262 
3263 /*
3264     Allows sending/receiving larger messages then 2 gigabytes in a single call
3265 */
3266 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
3267 {
3268   int             ierr;
3269   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3270   PetscInt        i,numchunks;
3271   PetscMPIInt     icnt;
3272 
3273   numchunks = cnt/CHUNKSIZE + 1;
3274   for (i=0; i<numchunks; i++) {
3275     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3276     ierr = MPI_Send(mess,icnt,type,to,tag,comm);if (ierr) return ierr;
3277     if (type == MPIU_INT) {
3278       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3279     } else if (type == MPIU_SCALAR) {
3280       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3281     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3282   }
3283   return 0;
3284 }
3285 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
3286 {
3287   int             ierr;
3288   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
3289   MPI_Status      status;
3290   PetscInt        i,numchunks;
3291   PetscMPIInt     icnt;
3292 
3293   numchunks = cnt/CHUNKSIZE + 1;
3294   for (i=0; i<numchunks; i++) {
3295     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
3296     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);if (ierr) return ierr;
3297     if (type == MPIU_INT) {
3298       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
3299     } else if (type == MPIU_SCALAR) {
3300       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
3301     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
3302   }
3303   return 0;
3304 }
3305 
3306 #undef __FUNCT__
3307 #define __FUNCT__ "MatLoad_MPIAIJ"
3308 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3309 {
3310   PetscScalar    *vals,*svals;
3311   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3312   PetscErrorCode ierr;
3313   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3314   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3315   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3316   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3317   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3318   int            fd;
3319 
3320   PetscFunctionBegin;
3321   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3322   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3323   if (!rank) {
3324     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3325     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3326     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3327   }
3328 
3329   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3330 
3331   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3332   M = header[1]; N = header[2];
3333   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3334   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3335   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3336 
3337   /* If global sizes are set, check if they are consistent with that given in the file */
3338   if (sizesset) {
3339     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3340   }
3341   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3342   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3343 
3344   /* determine ownership of all rows */
3345   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3346   else m = newMat->rmap->n; /* Set by user */
3347 
3348   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3349   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3350 
3351   /* First process needs enough room for process with most rows */
3352   if (!rank) {
3353     mmax       = rowners[1];
3354     for (i=2; i<size; i++) {
3355       mmax = PetscMax(mmax,rowners[i]);
3356     }
3357   } else mmax = m;
3358 
3359   rowners[0] = 0;
3360   for (i=2; i<=size; i++) {
3361     rowners[i] += rowners[i-1];
3362   }
3363   rstart = rowners[rank];
3364   rend   = rowners[rank+1];
3365 
3366   /* distribute row lengths to all processors */
3367   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3368   if (!rank) {
3369     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3370     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3371     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3372     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3373     for (j=0; j<m; j++) {
3374       procsnz[0] += ourlens[j];
3375     }
3376     for (i=1; i<size; i++) {
3377       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3378       /* calculate the number of nonzeros on each processor */
3379       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3380         procsnz[i] += rowlengths[j];
3381       }
3382       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3383     }
3384     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3385   } else {
3386     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3387   }
3388 
3389   if (!rank) {
3390     /* determine max buffer needed and allocate it */
3391     maxnz = 0;
3392     for (i=0; i<size; i++) {
3393       maxnz = PetscMax(maxnz,procsnz[i]);
3394     }
3395     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3396 
3397     /* read in my part of the matrix column indices  */
3398     nz   = procsnz[0];
3399     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3400     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3401 
3402     /* read in every one elses and ship off */
3403     for (i=1; i<size; i++) {
3404       nz     = procsnz[i];
3405       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3406       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3407     }
3408     ierr = PetscFree(cols);CHKERRQ(ierr);
3409   } else {
3410     /* determine buffer space needed for message */
3411     nz = 0;
3412     for (i=0; i<m; i++) {
3413       nz += ourlens[i];
3414     }
3415     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3416 
3417     /* receive message of column indices*/
3418     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3419   }
3420 
3421   /* determine column ownership if matrix is not square */
3422   if (N != M) {
3423     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3424     else n = newMat->cmap->n;
3425     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3426     cstart = cend - n;
3427   } else {
3428     cstart = rstart;
3429     cend   = rend;
3430     n      = cend - cstart;
3431   }
3432 
3433   /* loop over local rows, determining number of off diagonal entries */
3434   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3435   jj = 0;
3436   for (i=0; i<m; i++) {
3437     for (j=0; j<ourlens[i]; j++) {
3438       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3439       jj++;
3440     }
3441   }
3442 
3443   for (i=0; i<m; i++) {
3444     ourlens[i] -= offlens[i];
3445   }
3446   if (!sizesset) {
3447     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3448   }
3449   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3450 
3451   for (i=0; i<m; i++) {
3452     ourlens[i] += offlens[i];
3453   }
3454 
3455   if (!rank) {
3456     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3457 
3458     /* read in my part of the matrix numerical values  */
3459     nz   = procsnz[0];
3460     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3461 
3462     /* insert into matrix */
3463     jj      = rstart;
3464     smycols = mycols;
3465     svals   = vals;
3466     for (i=0; i<m; i++) {
3467       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3468       smycols += ourlens[i];
3469       svals   += ourlens[i];
3470       jj++;
3471     }
3472 
3473     /* read in other processors and ship out */
3474     for (i=1; i<size; i++) {
3475       nz     = procsnz[i];
3476       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3477       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3478     }
3479     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3480   } else {
3481     /* receive numeric values */
3482     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3483 
3484     /* receive message of values*/
3485     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3486 
3487     /* insert into matrix */
3488     jj      = rstart;
3489     smycols = mycols;
3490     svals   = vals;
3491     for (i=0; i<m; i++) {
3492       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3493       smycols += ourlens[i];
3494       svals   += ourlens[i];
3495       jj++;
3496     }
3497   }
3498   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3499   ierr = PetscFree(vals);CHKERRQ(ierr);
3500   ierr = PetscFree(mycols);CHKERRQ(ierr);
3501   ierr = PetscFree(rowners);CHKERRQ(ierr);
3502 
3503   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3504   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3505   PetscFunctionReturn(0);
3506 }
3507 
3508 #undef __FUNCT__
3509 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3510 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3511 {
3512   PetscErrorCode ierr;
3513   IS             iscol_local;
3514   PetscInt       csize;
3515 
3516   PetscFunctionBegin;
3517   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3518   if (call == MAT_REUSE_MATRIX) {
3519     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3520     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3521   } else {
3522     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3523   }
3524   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3525   if (call == MAT_INITIAL_MATRIX) {
3526     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3527     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3528   }
3529   PetscFunctionReturn(0);
3530 }
3531 
3532 #undef __FUNCT__
3533 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3534 /*
3535     Not great since it makes two copies of the submatrix, first an SeqAIJ
3536   in local and then by concatenating the local matrices the end result.
3537   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3538 
3539   Note: This requires a sequential iscol with all indices.
3540 */
3541 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3542 {
3543   PetscErrorCode ierr;
3544   PetscMPIInt    rank,size;
3545   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3546   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3547   Mat            *local,M,Mreuse;
3548   MatScalar      *vwork,*aa;
3549   MPI_Comm       comm = ((PetscObject)mat)->comm;
3550   Mat_SeqAIJ     *aij;
3551 
3552 
3553   PetscFunctionBegin;
3554   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3555   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3556 
3557   if (call ==  MAT_REUSE_MATRIX) {
3558     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3559     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3560     local = &Mreuse;
3561     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3562   } else {
3563     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3564     Mreuse = *local;
3565     ierr   = PetscFree(local);CHKERRQ(ierr);
3566   }
3567 
3568   /*
3569       m - number of local rows
3570       n - number of columns (same on all processors)
3571       rstart - first row in new global matrix generated
3572   */
3573   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3574   if (call == MAT_INITIAL_MATRIX) {
3575     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3576     ii  = aij->i;
3577     jj  = aij->j;
3578 
3579     /*
3580         Determine the number of non-zeros in the diagonal and off-diagonal
3581         portions of the matrix in order to do correct preallocation
3582     */
3583 
3584     /* first get start and end of "diagonal" columns */
3585     if (csize == PETSC_DECIDE) {
3586       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3587       if (mglobal == n) { /* square matrix */
3588 	nlocal = m;
3589       } else {
3590         nlocal = n/size + ((n % size) > rank);
3591       }
3592     } else {
3593       nlocal = csize;
3594     }
3595     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3596     rstart = rend - nlocal;
3597     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3598 
3599     /* next, compute all the lengths */
3600     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3601     olens = dlens + m;
3602     for (i=0; i<m; i++) {
3603       jend = ii[i+1] - ii[i];
3604       olen = 0;
3605       dlen = 0;
3606       for (j=0; j<jend; j++) {
3607         if (*jj < rstart || *jj >= rend) olen++;
3608         else dlen++;
3609         jj++;
3610       }
3611       olens[i] = olen;
3612       dlens[i] = dlen;
3613     }
3614     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3615     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3616     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3617     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3618     ierr = PetscFree(dlens);CHKERRQ(ierr);
3619   } else {
3620     PetscInt ml,nl;
3621 
3622     M = *newmat;
3623     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3624     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3625     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3626     /*
3627          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3628        rather than the slower MatSetValues().
3629     */
3630     M->was_assembled = PETSC_TRUE;
3631     M->assembled     = PETSC_FALSE;
3632   }
3633   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3634   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3635   ii  = aij->i;
3636   jj  = aij->j;
3637   aa  = aij->a;
3638   for (i=0; i<m; i++) {
3639     row   = rstart + i;
3640     nz    = ii[i+1] - ii[i];
3641     cwork = jj;     jj += nz;
3642     vwork = aa;     aa += nz;
3643     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3644   }
3645 
3646   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3647   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3648   *newmat = M;
3649 
3650   /* save submatrix used in processor for next request */
3651   if (call ==  MAT_INITIAL_MATRIX) {
3652     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3653     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3654   }
3655 
3656   PetscFunctionReturn(0);
3657 }
3658 
3659 EXTERN_C_BEGIN
3660 #undef __FUNCT__
3661 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3662 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3663 {
3664   PetscInt       m,cstart, cend,j,nnz,i,d;
3665   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3666   const PetscInt *JJ;
3667   PetscScalar    *values;
3668   PetscErrorCode ierr;
3669 
3670   PetscFunctionBegin;
3671   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3672 
3673   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3674   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3675   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3676   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3677   m      = B->rmap->n;
3678   cstart = B->cmap->rstart;
3679   cend   = B->cmap->rend;
3680   rstart = B->rmap->rstart;
3681 
3682   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3683 
3684 #if defined(PETSC_USE_DEBUGGING)
3685   for (i=0; i<m; i++) {
3686     nnz     = Ii[i+1]- Ii[i];
3687     JJ      = J + Ii[i];
3688     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3689     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3690     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3691   }
3692 #endif
3693 
3694   for (i=0; i<m; i++) {
3695     nnz     = Ii[i+1]- Ii[i];
3696     JJ      = J + Ii[i];
3697     nnz_max = PetscMax(nnz_max,nnz);
3698     d       = 0;
3699     for (j=0; j<nnz; j++) {
3700       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3701     }
3702     d_nnz[i] = d;
3703     o_nnz[i] = nnz - d;
3704   }
3705   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3706   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3707 
3708   if (v) values = (PetscScalar*)v;
3709   else {
3710     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3711     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3712   }
3713 
3714   for (i=0; i<m; i++) {
3715     ii   = i + rstart;
3716     nnz  = Ii[i+1]- Ii[i];
3717     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3718   }
3719   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3720   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3721 
3722   if (!v) {
3723     ierr = PetscFree(values);CHKERRQ(ierr);
3724   }
3725   PetscFunctionReturn(0);
3726 }
3727 EXTERN_C_END
3728 
3729 #undef __FUNCT__
3730 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3731 /*@
3732    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3733    (the default parallel PETSc format).
3734 
3735    Collective on MPI_Comm
3736 
3737    Input Parameters:
3738 +  B - the matrix
3739 .  i - the indices into j for the start of each local row (starts with zero)
3740 .  j - the column indices for each local row (starts with zero)
3741 -  v - optional values in the matrix
3742 
3743    Level: developer
3744 
3745    Notes:
3746        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3747      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3748      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3749 
3750        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3751 
3752        The format which is used for the sparse matrix input, is equivalent to a
3753     row-major ordering.. i.e for the following matrix, the input data expected is
3754     as shown:
3755 
3756         1 0 0
3757         2 0 3     P0
3758        -------
3759         4 5 6     P1
3760 
3761      Process0 [P0]: rows_owned=[0,1]
3762         i =  {0,1,3}  [size = nrow+1  = 2+1]
3763         j =  {0,0,2}  [size = nz = 6]
3764         v =  {1,2,3}  [size = nz = 6]
3765 
3766      Process1 [P1]: rows_owned=[2]
3767         i =  {0,3}    [size = nrow+1  = 1+1]
3768         j =  {0,1,2}  [size = nz = 6]
3769         v =  {4,5,6}  [size = nz = 6]
3770 
3771 .keywords: matrix, aij, compressed row, sparse, parallel
3772 
3773 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3774           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3775 @*/
3776 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3777 {
3778   PetscErrorCode ierr;
3779 
3780   PetscFunctionBegin;
3781   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3782   PetscFunctionReturn(0);
3783 }
3784 
3785 #undef __FUNCT__
3786 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3787 /*@C
3788    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3789    (the default parallel PETSc format).  For good matrix assembly performance
3790    the user should preallocate the matrix storage by setting the parameters
3791    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3792    performance can be increased by more than a factor of 50.
3793 
3794    Collective on MPI_Comm
3795 
3796    Input Parameters:
3797 +  A - the matrix
3798 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3799            (same value is used for all local rows)
3800 .  d_nnz - array containing the number of nonzeros in the various rows of the
3801            DIAGONAL portion of the local submatrix (possibly different for each row)
3802            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3803            The size of this array is equal to the number of local rows, i.e 'm'.
3804            You must leave room for the diagonal entry even if it is zero.
3805 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3806            submatrix (same value is used for all local rows).
3807 -  o_nnz - array containing the number of nonzeros in the various rows of the
3808            OFF-DIAGONAL portion of the local submatrix (possibly different for
3809            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3810            structure. The size of this array is equal to the number
3811            of local rows, i.e 'm'.
3812 
3813    If the *_nnz parameter is given then the *_nz parameter is ignored
3814 
3815    The AIJ format (also called the Yale sparse matrix format or
3816    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3817    storage.  The stored row and column indices begin with zero.
3818    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3819 
3820    The parallel matrix is partitioned such that the first m0 rows belong to
3821    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3822    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3823 
3824    The DIAGONAL portion of the local submatrix of a processor can be defined
3825    as the submatrix which is obtained by extraction the part corresponding to
3826    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3827    first row that belongs to the processor, r2 is the last row belonging to
3828    the this processor, and c1-c2 is range of indices of the local part of a
3829    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3830    common case of a square matrix, the row and column ranges are the same and
3831    the DIAGONAL part is also square. The remaining portion of the local
3832    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3833 
3834    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3835 
3836    You can call MatGetInfo() to get information on how effective the preallocation was;
3837    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3838    You can also run with the option -info and look for messages with the string
3839    malloc in them to see if additional memory allocation was needed.
3840 
3841    Example usage:
3842 
3843    Consider the following 8x8 matrix with 34 non-zero values, that is
3844    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3845    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3846    as follows:
3847 
3848 .vb
3849             1  2  0  |  0  3  0  |  0  4
3850     Proc0   0  5  6  |  7  0  0  |  8  0
3851             9  0 10  | 11  0  0  | 12  0
3852     -------------------------------------
3853            13  0 14  | 15 16 17  |  0  0
3854     Proc1   0 18  0  | 19 20 21  |  0  0
3855             0  0  0  | 22 23  0  | 24  0
3856     -------------------------------------
3857     Proc2  25 26 27  |  0  0 28  | 29  0
3858            30  0  0  | 31 32 33  |  0 34
3859 .ve
3860 
3861    This can be represented as a collection of submatrices as:
3862 
3863 .vb
3864       A B C
3865       D E F
3866       G H I
3867 .ve
3868 
3869    Where the submatrices A,B,C are owned by proc0, D,E,F are
3870    owned by proc1, G,H,I are owned by proc2.
3871 
3872    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3873    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3874    The 'M','N' parameters are 8,8, and have the same values on all procs.
3875 
3876    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3877    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3878    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3879    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3880    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3881    matrix, ans [DF] as another SeqAIJ matrix.
3882 
3883    When d_nz, o_nz parameters are specified, d_nz storage elements are
3884    allocated for every row of the local diagonal submatrix, and o_nz
3885    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3886    One way to choose d_nz and o_nz is to use the max nonzerors per local
3887    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3888    In this case, the values of d_nz,o_nz are:
3889 .vb
3890      proc0 : dnz = 2, o_nz = 2
3891      proc1 : dnz = 3, o_nz = 2
3892      proc2 : dnz = 1, o_nz = 4
3893 .ve
3894    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3895    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3896    for proc3. i.e we are using 12+15+10=37 storage locations to store
3897    34 values.
3898 
3899    When d_nnz, o_nnz parameters are specified, the storage is specified
3900    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3901    In the above case the values for d_nnz,o_nnz are:
3902 .vb
3903      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3904      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3905      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3906 .ve
3907    Here the space allocated is sum of all the above values i.e 34, and
3908    hence pre-allocation is perfect.
3909 
3910    Level: intermediate
3911 
3912 .keywords: matrix, aij, compressed row, sparse, parallel
3913 
3914 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3915           MPIAIJ, MatGetInfo()
3916 @*/
3917 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3918 {
3919   PetscErrorCode ierr;
3920 
3921   PetscFunctionBegin;
3922   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3923   PetscFunctionReturn(0);
3924 }
3925 
3926 #undef __FUNCT__
3927 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3928 /*@
3929      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3930          CSR format the local rows.
3931 
3932    Collective on MPI_Comm
3933 
3934    Input Parameters:
3935 +  comm - MPI communicator
3936 .  m - number of local rows (Cannot be PETSC_DECIDE)
3937 .  n - This value should be the same as the local size used in creating the
3938        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3939        calculated if N is given) For square matrices n is almost always m.
3940 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3941 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3942 .   i - row indices
3943 .   j - column indices
3944 -   a - matrix values
3945 
3946    Output Parameter:
3947 .   mat - the matrix
3948 
3949    Level: intermediate
3950 
3951    Notes:
3952        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3953      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3954      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3955 
3956        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3957 
3958        The format which is used for the sparse matrix input, is equivalent to a
3959     row-major ordering.. i.e for the following matrix, the input data expected is
3960     as shown:
3961 
3962         1 0 0
3963         2 0 3     P0
3964        -------
3965         4 5 6     P1
3966 
3967      Process0 [P0]: rows_owned=[0,1]
3968         i =  {0,1,3}  [size = nrow+1  = 2+1]
3969         j =  {0,0,2}  [size = nz = 6]
3970         v =  {1,2,3}  [size = nz = 6]
3971 
3972      Process1 [P1]: rows_owned=[2]
3973         i =  {0,3}    [size = nrow+1  = 1+1]
3974         j =  {0,1,2}  [size = nz = 6]
3975         v =  {4,5,6}  [size = nz = 6]
3976 
3977 .keywords: matrix, aij, compressed row, sparse, parallel
3978 
3979 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3980           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3981 @*/
3982 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3983 {
3984   PetscErrorCode ierr;
3985 
3986  PetscFunctionBegin;
3987   if (i[0]) {
3988     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3989   }
3990   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3991   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3992   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3993   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3994   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3995   PetscFunctionReturn(0);
3996 }
3997 
3998 #undef __FUNCT__
3999 #define __FUNCT__ "MatCreateMPIAIJ"
4000 /*@C
4001    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
4002    (the default parallel PETSc format).  For good matrix assembly performance
4003    the user should preallocate the matrix storage by setting the parameters
4004    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4005    performance can be increased by more than a factor of 50.
4006 
4007    Collective on MPI_Comm
4008 
4009    Input Parameters:
4010 +  comm - MPI communicator
4011 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4012            This value should be the same as the local size used in creating the
4013            y vector for the matrix-vector product y = Ax.
4014 .  n - This value should be the same as the local size used in creating the
4015        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4016        calculated if N is given) For square matrices n is almost always m.
4017 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4018 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4019 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4020            (same value is used for all local rows)
4021 .  d_nnz - array containing the number of nonzeros in the various rows of the
4022            DIAGONAL portion of the local submatrix (possibly different for each row)
4023            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4024            The size of this array is equal to the number of local rows, i.e 'm'.
4025            You must leave room for the diagonal entry even if it is zero.
4026 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4027            submatrix (same value is used for all local rows).
4028 -  o_nnz - array containing the number of nonzeros in the various rows of the
4029            OFF-DIAGONAL portion of the local submatrix (possibly different for
4030            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4031            structure. The size of this array is equal to the number
4032            of local rows, i.e 'm'.
4033 
4034    Output Parameter:
4035 .  A - the matrix
4036 
4037    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4038    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4039    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4040 
4041    Notes:
4042    If the *_nnz parameter is given then the *_nz parameter is ignored
4043 
4044    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4045    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4046    storage requirements for this matrix.
4047 
4048    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4049    processor than it must be used on all processors that share the object for
4050    that argument.
4051 
4052    The user MUST specify either the local or global matrix dimensions
4053    (possibly both).
4054 
4055    The parallel matrix is partitioned across processors such that the
4056    first m0 rows belong to process 0, the next m1 rows belong to
4057    process 1, the next m2 rows belong to process 2 etc.. where
4058    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4059    values corresponding to [m x N] submatrix.
4060 
4061    The columns are logically partitioned with the n0 columns belonging
4062    to 0th partition, the next n1 columns belonging to the next
4063    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4064 
4065    The DIAGONAL portion of the local submatrix on any given processor
4066    is the submatrix corresponding to the rows and columns m,n
4067    corresponding to the given processor. i.e diagonal matrix on
4068    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4069    etc. The remaining portion of the local submatrix [m x (N-n)]
4070    constitute the OFF-DIAGONAL portion. The example below better
4071    illustrates this concept.
4072 
4073    For a square global matrix we define each processor's diagonal portion
4074    to be its local rows and the corresponding columns (a square submatrix);
4075    each processor's off-diagonal portion encompasses the remainder of the
4076    local matrix (a rectangular submatrix).
4077 
4078    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4079 
4080    When calling this routine with a single process communicator, a matrix of
4081    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4082    type of communicator, use the construction mechanism:
4083      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4084 
4085    By default, this format uses inodes (identical nodes) when possible.
4086    We search for consecutive rows with the same nonzero structure, thereby
4087    reusing matrix information to achieve increased efficiency.
4088 
4089    Options Database Keys:
4090 +  -mat_no_inode  - Do not use inodes
4091 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4092 -  -mat_aij_oneindex - Internally use indexing starting at 1
4093         rather than 0.  Note that when calling MatSetValues(),
4094         the user still MUST index entries starting at 0!
4095 
4096 
4097    Example usage:
4098 
4099    Consider the following 8x8 matrix with 34 non-zero values, that is
4100    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4101    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4102    as follows:
4103 
4104 .vb
4105             1  2  0  |  0  3  0  |  0  4
4106     Proc0   0  5  6  |  7  0  0  |  8  0
4107             9  0 10  | 11  0  0  | 12  0
4108     -------------------------------------
4109            13  0 14  | 15 16 17  |  0  0
4110     Proc1   0 18  0  | 19 20 21  |  0  0
4111             0  0  0  | 22 23  0  | 24  0
4112     -------------------------------------
4113     Proc2  25 26 27  |  0  0 28  | 29  0
4114            30  0  0  | 31 32 33  |  0 34
4115 .ve
4116 
4117    This can be represented as a collection of submatrices as:
4118 
4119 .vb
4120       A B C
4121       D E F
4122       G H I
4123 .ve
4124 
4125    Where the submatrices A,B,C are owned by proc0, D,E,F are
4126    owned by proc1, G,H,I are owned by proc2.
4127 
4128    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4129    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4130    The 'M','N' parameters are 8,8, and have the same values on all procs.
4131 
4132    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4133    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4134    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4135    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4136    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4137    matrix, ans [DF] as another SeqAIJ matrix.
4138 
4139    When d_nz, o_nz parameters are specified, d_nz storage elements are
4140    allocated for every row of the local diagonal submatrix, and o_nz
4141    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4142    One way to choose d_nz and o_nz is to use the max nonzerors per local
4143    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4144    In this case, the values of d_nz,o_nz are:
4145 .vb
4146      proc0 : dnz = 2, o_nz = 2
4147      proc1 : dnz = 3, o_nz = 2
4148      proc2 : dnz = 1, o_nz = 4
4149 .ve
4150    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4151    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4152    for proc3. i.e we are using 12+15+10=37 storage locations to store
4153    34 values.
4154 
4155    When d_nnz, o_nnz parameters are specified, the storage is specified
4156    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4157    In the above case the values for d_nnz,o_nnz are:
4158 .vb
4159      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4160      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4161      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4162 .ve
4163    Here the space allocated is sum of all the above values i.e 34, and
4164    hence pre-allocation is perfect.
4165 
4166    Level: intermediate
4167 
4168 .keywords: matrix, aij, compressed row, sparse, parallel
4169 
4170 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4171           MPIAIJ, MatCreateMPIAIJWithArrays()
4172 @*/
4173 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4174 {
4175   PetscErrorCode ierr;
4176   PetscMPIInt    size;
4177 
4178   PetscFunctionBegin;
4179   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4180   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4181   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4182   if (size > 1) {
4183     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4184     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4185   } else {
4186     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4187     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4188   }
4189   PetscFunctionReturn(0);
4190 }
4191 
4192 #undef __FUNCT__
4193 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4194 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4195 {
4196   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4197 
4198   PetscFunctionBegin;
4199   *Ad     = a->A;
4200   *Ao     = a->B;
4201   *colmap = a->garray;
4202   PetscFunctionReturn(0);
4203 }
4204 
4205 #undef __FUNCT__
4206 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4207 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4208 {
4209   PetscErrorCode ierr;
4210   PetscInt       i;
4211   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4212 
4213   PetscFunctionBegin;
4214   if (coloring->ctype == IS_COLORING_GLOBAL) {
4215     ISColoringValue *allcolors,*colors;
4216     ISColoring      ocoloring;
4217 
4218     /* set coloring for diagonal portion */
4219     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4220 
4221     /* set coloring for off-diagonal portion */
4222     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4223     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4224     for (i=0; i<a->B->cmap->n; i++) {
4225       colors[i] = allcolors[a->garray[i]];
4226     }
4227     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4228     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4229     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4230     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4231   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4232     ISColoringValue *colors;
4233     PetscInt        *larray;
4234     ISColoring      ocoloring;
4235 
4236     /* set coloring for diagonal portion */
4237     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4238     for (i=0; i<a->A->cmap->n; i++) {
4239       larray[i] = i + A->cmap->rstart;
4240     }
4241     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4242     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4243     for (i=0; i<a->A->cmap->n; i++) {
4244       colors[i] = coloring->colors[larray[i]];
4245     }
4246     ierr = PetscFree(larray);CHKERRQ(ierr);
4247     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4248     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4249     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4250 
4251     /* set coloring for off-diagonal portion */
4252     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4253     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4254     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4255     for (i=0; i<a->B->cmap->n; i++) {
4256       colors[i] = coloring->colors[larray[i]];
4257     }
4258     ierr = PetscFree(larray);CHKERRQ(ierr);
4259     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4260     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4261     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4262   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4263 
4264   PetscFunctionReturn(0);
4265 }
4266 
4267 #if defined(PETSC_HAVE_ADIC)
4268 #undef __FUNCT__
4269 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4270 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4271 {
4272   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4273   PetscErrorCode ierr;
4274 
4275   PetscFunctionBegin;
4276   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4277   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4278   PetscFunctionReturn(0);
4279 }
4280 #endif
4281 
4282 #undef __FUNCT__
4283 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4284 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4285 {
4286   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4287   PetscErrorCode ierr;
4288 
4289   PetscFunctionBegin;
4290   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4291   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4292   PetscFunctionReturn(0);
4293 }
4294 
4295 #undef __FUNCT__
4296 #define __FUNCT__ "MatMerge"
4297 /*@
4298       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4299                  matrices from each processor
4300 
4301     Collective on MPI_Comm
4302 
4303    Input Parameters:
4304 +    comm - the communicators the parallel matrix will live on
4305 .    inmat - the input sequential matrices
4306 .    n - number of local columns (or PETSC_DECIDE)
4307 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4308 
4309    Output Parameter:
4310 .    outmat - the parallel matrix generated
4311 
4312     Level: advanced
4313 
4314    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4315 
4316 @*/
4317 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4318 {
4319   PetscErrorCode ierr;
4320   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4321   PetscInt       *indx;
4322   PetscScalar    *values;
4323 
4324   PetscFunctionBegin;
4325   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4326   if (scall == MAT_INITIAL_MATRIX){
4327     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4328     if (n == PETSC_DECIDE){
4329       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4330     }
4331     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4332     rstart -= m;
4333 
4334     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4335     for (i=0;i<m;i++) {
4336       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4337       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4338       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4339     }
4340     /* This routine will ONLY return MPIAIJ type matrix */
4341     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4342     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4343     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4344     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4345     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4346 
4347   } else if (scall == MAT_REUSE_MATRIX){
4348     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4349   } else {
4350     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4351   }
4352 
4353   for (i=0;i<m;i++) {
4354     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4355     Ii    = i + rstart;
4356     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4357     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4358   }
4359   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4360   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4361   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4362 
4363   PetscFunctionReturn(0);
4364 }
4365 
4366 #undef __FUNCT__
4367 #define __FUNCT__ "MatFileSplit"
4368 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4369 {
4370   PetscErrorCode    ierr;
4371   PetscMPIInt       rank;
4372   PetscInt          m,N,i,rstart,nnz;
4373   size_t            len;
4374   const PetscInt    *indx;
4375   PetscViewer       out;
4376   char              *name;
4377   Mat               B;
4378   const PetscScalar *values;
4379 
4380   PetscFunctionBegin;
4381   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4382   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4383   /* Should this be the type of the diagonal block of A? */
4384   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4385   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4386   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4387   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4388   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4389   for (i=0;i<m;i++) {
4390     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4391     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4392     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4393   }
4394   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4395   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4396 
4397   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4398   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4399   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4400   sprintf(name,"%s.%d",outfile,rank);
4401   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4402   ierr = PetscFree(name);
4403   ierr = MatView(B,out);CHKERRQ(ierr);
4404   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4405   ierr = MatDestroy(&B);CHKERRQ(ierr);
4406   PetscFunctionReturn(0);
4407 }
4408 
4409 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4410 #undef __FUNCT__
4411 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4412 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4413 {
4414   PetscErrorCode       ierr;
4415   Mat_Merge_SeqsToMPI  *merge;
4416   PetscContainer       container;
4417 
4418   PetscFunctionBegin;
4419   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4420   if (container) {
4421     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4422     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4423     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4424     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4425     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4426     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4427     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4428     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4429     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4430     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4431     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4432     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4433     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4434     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4435     ierr = PetscFree(merge);CHKERRQ(ierr);
4436     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4437   }
4438   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4439   PetscFunctionReturn(0);
4440 }
4441 
4442 #include <../src/mat/utils/freespace.h>
4443 #include <petscbt.h>
4444 
4445 #undef __FUNCT__
4446 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4447 /*@C
4448       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4449                  matrices from each processor
4450 
4451     Collective on MPI_Comm
4452 
4453    Input Parameters:
4454 +    comm - the communicators the parallel matrix will live on
4455 .    seqmat - the input sequential matrices
4456 .    m - number of local rows (or PETSC_DECIDE)
4457 .    n - number of local columns (or PETSC_DECIDE)
4458 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4459 
4460    Output Parameter:
4461 .    mpimat - the parallel matrix generated
4462 
4463     Level: advanced
4464 
4465    Notes:
4466      The dimensions of the sequential matrix in each processor MUST be the same.
4467      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4468      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4469 @*/
4470 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4471 {
4472   PetscErrorCode       ierr;
4473   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4474   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4475   PetscMPIInt          size,rank,taga,*len_s;
4476   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4477   PetscInt             proc,m;
4478   PetscInt             **buf_ri,**buf_rj;
4479   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4480   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4481   MPI_Request          *s_waits,*r_waits;
4482   MPI_Status           *status;
4483   MatScalar            *aa=a->a;
4484   MatScalar            **abuf_r,*ba_i;
4485   Mat_Merge_SeqsToMPI  *merge;
4486   PetscContainer       container;
4487 
4488   PetscFunctionBegin;
4489   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4490 
4491   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4492   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4493 
4494   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4495   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4496 
4497   bi     = merge->bi;
4498   bj     = merge->bj;
4499   buf_ri = merge->buf_ri;
4500   buf_rj = merge->buf_rj;
4501 
4502   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4503   owners = merge->rowmap->range;
4504   len_s  = merge->len_s;
4505 
4506   /* send and recv matrix values */
4507   /*-----------------------------*/
4508   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4509   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4510 
4511   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4512   for (proc=0,k=0; proc<size; proc++){
4513     if (!len_s[proc]) continue;
4514     i = owners[proc];
4515     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4516     k++;
4517   }
4518 
4519   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4520   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4521   ierr = PetscFree(status);CHKERRQ(ierr);
4522 
4523   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4524   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4525 
4526   /* insert mat values of mpimat */
4527   /*----------------------------*/
4528   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4529   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4530 
4531   for (k=0; k<merge->nrecv; k++){
4532     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4533     nrows = *(buf_ri_k[k]);
4534     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4535     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4536   }
4537 
4538   /* set values of ba */
4539   m = merge->rowmap->n;
4540   for (i=0; i<m; i++) {
4541     arow = owners[rank] + i;
4542     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4543     bnzi = bi[i+1] - bi[i];
4544     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4545 
4546     /* add local non-zero vals of this proc's seqmat into ba */
4547     anzi = ai[arow+1] - ai[arow];
4548     aj   = a->j + ai[arow];
4549     aa   = a->a + ai[arow];
4550     nextaj = 0;
4551     for (j=0; nextaj<anzi; j++){
4552       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4553         ba_i[j] += aa[nextaj++];
4554       }
4555     }
4556 
4557     /* add received vals into ba */
4558     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4559       /* i-th row */
4560       if (i == *nextrow[k]) {
4561         anzi = *(nextai[k]+1) - *nextai[k];
4562         aj   = buf_rj[k] + *(nextai[k]);
4563         aa   = abuf_r[k] + *(nextai[k]);
4564         nextaj = 0;
4565         for (j=0; nextaj<anzi; j++){
4566           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4567             ba_i[j] += aa[nextaj++];
4568           }
4569         }
4570         nextrow[k]++; nextai[k]++;
4571       }
4572     }
4573     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4574   }
4575   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4576   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4577 
4578   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4579   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4580   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4581   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4582   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4583   PetscFunctionReturn(0);
4584 }
4585 
4586 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4587 
4588 #undef __FUNCT__
4589 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4590 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4591 {
4592   PetscErrorCode       ierr;
4593   Mat                  B_mpi;
4594   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4595   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4596   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4597   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4598   PetscInt             len,proc,*dnz,*onz;
4599   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4600   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4601   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4602   MPI_Status           *status;
4603   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4604   PetscBT              lnkbt;
4605   Mat_Merge_SeqsToMPI  *merge;
4606   PetscContainer       container;
4607 
4608   PetscFunctionBegin;
4609   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4610 
4611   /* make sure it is a PETSc comm */
4612   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4613   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4614   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4615 
4616   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4617   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4618 
4619   /* determine row ownership */
4620   /*---------------------------------------------------------*/
4621   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4622   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4623   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4624   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4625   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4626   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4627   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4628 
4629   m      = merge->rowmap->n;
4630   M      = merge->rowmap->N;
4631   owners = merge->rowmap->range;
4632 
4633   /* determine the number of messages to send, their lengths */
4634   /*---------------------------------------------------------*/
4635   len_s  = merge->len_s;
4636 
4637   len = 0;  /* length of buf_si[] */
4638   merge->nsend = 0;
4639   for (proc=0; proc<size; proc++){
4640     len_si[proc] = 0;
4641     if (proc == rank){
4642       len_s[proc] = 0;
4643     } else {
4644       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4645       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4646     }
4647     if (len_s[proc]) {
4648       merge->nsend++;
4649       nrows = 0;
4650       for (i=owners[proc]; i<owners[proc+1]; i++){
4651         if (ai[i+1] > ai[i]) nrows++;
4652       }
4653       len_si[proc] = 2*(nrows+1);
4654       len += len_si[proc];
4655     }
4656   }
4657 
4658   /* determine the number and length of messages to receive for ij-structure */
4659   /*-------------------------------------------------------------------------*/
4660   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4661   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4662 
4663   /* post the Irecv of j-structure */
4664   /*-------------------------------*/
4665   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4666   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4667 
4668   /* post the Isend of j-structure */
4669   /*--------------------------------*/
4670   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4671 
4672   for (proc=0, k=0; proc<size; proc++){
4673     if (!len_s[proc]) continue;
4674     i = owners[proc];
4675     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4676     k++;
4677   }
4678 
4679   /* receives and sends of j-structure are complete */
4680   /*------------------------------------------------*/
4681   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4682   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4683 
4684   /* send and recv i-structure */
4685   /*---------------------------*/
4686   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4687   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4688 
4689   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4690   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4691   for (proc=0,k=0; proc<size; proc++){
4692     if (!len_s[proc]) continue;
4693     /* form outgoing message for i-structure:
4694          buf_si[0]:                 nrows to be sent
4695                [1:nrows]:           row index (global)
4696                [nrows+1:2*nrows+1]: i-structure index
4697     */
4698     /*-------------------------------------------*/
4699     nrows = len_si[proc]/2 - 1;
4700     buf_si_i    = buf_si + nrows+1;
4701     buf_si[0]   = nrows;
4702     buf_si_i[0] = 0;
4703     nrows = 0;
4704     for (i=owners[proc]; i<owners[proc+1]; i++){
4705       anzi = ai[i+1] - ai[i];
4706       if (anzi) {
4707         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4708         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4709         nrows++;
4710       }
4711     }
4712     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4713     k++;
4714     buf_si += len_si[proc];
4715   }
4716 
4717   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4718   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4719 
4720   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4721   for (i=0; i<merge->nrecv; i++){
4722     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4723   }
4724 
4725   ierr = PetscFree(len_si);CHKERRQ(ierr);
4726   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4727   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4728   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4729   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4730   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4731   ierr = PetscFree(status);CHKERRQ(ierr);
4732 
4733   /* compute a local seq matrix in each processor */
4734   /*----------------------------------------------*/
4735   /* allocate bi array and free space for accumulating nonzero column info */
4736   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4737   bi[0] = 0;
4738 
4739   /* create and initialize a linked list */
4740   nlnk = N+1;
4741   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4742 
4743   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4744   len = 0;
4745   len  = ai[owners[rank+1]] - ai[owners[rank]];
4746   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4747   current_space = free_space;
4748 
4749   /* determine symbolic info for each local row */
4750   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4751 
4752   for (k=0; k<merge->nrecv; k++){
4753     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4754     nrows = *buf_ri_k[k];
4755     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4756     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4757   }
4758 
4759   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4760   len = 0;
4761   for (i=0;i<m;i++) {
4762     bnzi   = 0;
4763     /* add local non-zero cols of this proc's seqmat into lnk */
4764     arow   = owners[rank] + i;
4765     anzi   = ai[arow+1] - ai[arow];
4766     aj     = a->j + ai[arow];
4767     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4768     bnzi += nlnk;
4769     /* add received col data into lnk */
4770     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4771       if (i == *nextrow[k]) { /* i-th row */
4772         anzi = *(nextai[k]+1) - *nextai[k];
4773         aj   = buf_rj[k] + *nextai[k];
4774         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4775         bnzi += nlnk;
4776         nextrow[k]++; nextai[k]++;
4777       }
4778     }
4779     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4780 
4781     /* if free space is not available, make more free space */
4782     if (current_space->local_remaining<bnzi) {
4783       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4784       nspacedouble++;
4785     }
4786     /* copy data into free space, then initialize lnk */
4787     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4788     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4789 
4790     current_space->array           += bnzi;
4791     current_space->local_used      += bnzi;
4792     current_space->local_remaining -= bnzi;
4793 
4794     bi[i+1] = bi[i] + bnzi;
4795   }
4796 
4797   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4798 
4799   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4800   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4801   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4802 
4803   /* create symbolic parallel matrix B_mpi */
4804   /*---------------------------------------*/
4805   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4806   if (n==PETSC_DECIDE) {
4807     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4808   } else {
4809     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4810   }
4811   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4812   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4813   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4814 
4815   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4816   B_mpi->assembled     = PETSC_FALSE;
4817   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4818   merge->bi            = bi;
4819   merge->bj            = bj;
4820   merge->buf_ri        = buf_ri;
4821   merge->buf_rj        = buf_rj;
4822   merge->coi           = PETSC_NULL;
4823   merge->coj           = PETSC_NULL;
4824   merge->owners_co     = PETSC_NULL;
4825 
4826   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4827 
4828   /* attach the supporting struct to B_mpi for reuse */
4829   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4830   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4831   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4832   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4833   *mpimat = B_mpi;
4834 
4835   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4836   PetscFunctionReturn(0);
4837 }
4838 
4839 #undef __FUNCT__
4840 #define __FUNCT__ "MatMerge_SeqsToMPI"
4841 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4842 {
4843   PetscErrorCode   ierr;
4844 
4845   PetscFunctionBegin;
4846   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4847   if (scall == MAT_INITIAL_MATRIX){
4848     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4849   }
4850   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4851   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4852   PetscFunctionReturn(0);
4853 }
4854 
4855 #undef __FUNCT__
4856 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4857 /*@
4858      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4859           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4860           with MatGetSize()
4861 
4862     Not Collective
4863 
4864    Input Parameters:
4865 +    A - the matrix
4866 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4867 
4868    Output Parameter:
4869 .    A_loc - the local sequential matrix generated
4870 
4871     Level: developer
4872 
4873 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4874 
4875 @*/
4876 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4877 {
4878   PetscErrorCode  ierr;
4879   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4880   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4881   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4882   MatScalar       *aa=a->a,*ba=b->a,*cam;
4883   PetscScalar     *ca;
4884   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4885   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4886   PetscBool       match;
4887 
4888   PetscFunctionBegin;
4889   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4890   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4891   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4892   if (scall == MAT_INITIAL_MATRIX){
4893     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4894     ci[0] = 0;
4895     for (i=0; i<am; i++){
4896       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4897     }
4898     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4899     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4900     k = 0;
4901     for (i=0; i<am; i++) {
4902       ncols_o = bi[i+1] - bi[i];
4903       ncols_d = ai[i+1] - ai[i];
4904       /* off-diagonal portion of A */
4905       for (jo=0; jo<ncols_o; jo++) {
4906         col = cmap[*bj];
4907         if (col >= cstart) break;
4908         cj[k]   = col; bj++;
4909         ca[k++] = *ba++;
4910       }
4911       /* diagonal portion of A */
4912       for (j=0; j<ncols_d; j++) {
4913         cj[k]   = cstart + *aj++;
4914         ca[k++] = *aa++;
4915       }
4916       /* off-diagonal portion of A */
4917       for (j=jo; j<ncols_o; j++) {
4918         cj[k]   = cmap[*bj++];
4919         ca[k++] = *ba++;
4920       }
4921     }
4922     /* put together the new matrix */
4923     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4924     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4925     /* Since these are PETSc arrays, change flags to free them as necessary. */
4926     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4927     mat->free_a  = PETSC_TRUE;
4928     mat->free_ij = PETSC_TRUE;
4929     mat->nonew   = 0;
4930   } else if (scall == MAT_REUSE_MATRIX){
4931     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4932     ci = mat->i; cj = mat->j; cam = mat->a;
4933     for (i=0; i<am; i++) {
4934       /* off-diagonal portion of A */
4935       ncols_o = bi[i+1] - bi[i];
4936       for (jo=0; jo<ncols_o; jo++) {
4937         col = cmap[*bj];
4938         if (col >= cstart) break;
4939         *cam++ = *ba++; bj++;
4940       }
4941       /* diagonal portion of A */
4942       ncols_d = ai[i+1] - ai[i];
4943       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4944       /* off-diagonal portion of A */
4945       for (j=jo; j<ncols_o; j++) {
4946         *cam++ = *ba++; bj++;
4947       }
4948     }
4949   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4950   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4951   PetscFunctionReturn(0);
4952 }
4953 
4954 #undef __FUNCT__
4955 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4956 /*@C
4957      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4958 
4959     Not Collective
4960 
4961    Input Parameters:
4962 +    A - the matrix
4963 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4964 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4965 
4966    Output Parameter:
4967 .    A_loc - the local sequential matrix generated
4968 
4969     Level: developer
4970 
4971 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4972 
4973 @*/
4974 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4975 {
4976   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4977   PetscErrorCode    ierr;
4978   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4979   IS                isrowa,iscola;
4980   Mat               *aloc;
4981   PetscBool       match;
4982 
4983   PetscFunctionBegin;
4984   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4985   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4986   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4987   if (!row){
4988     start = A->rmap->rstart; end = A->rmap->rend;
4989     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4990   } else {
4991     isrowa = *row;
4992   }
4993   if (!col){
4994     start = A->cmap->rstart;
4995     cmap  = a->garray;
4996     nzA   = a->A->cmap->n;
4997     nzB   = a->B->cmap->n;
4998     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4999     ncols = 0;
5000     for (i=0; i<nzB; i++) {
5001       if (cmap[i] < start) idx[ncols++] = cmap[i];
5002       else break;
5003     }
5004     imark = i;
5005     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5006     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5007     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5008   } else {
5009     iscola = *col;
5010   }
5011   if (scall != MAT_INITIAL_MATRIX){
5012     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5013     aloc[0] = *A_loc;
5014   }
5015   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5016   *A_loc = aloc[0];
5017   ierr = PetscFree(aloc);CHKERRQ(ierr);
5018   if (!row){
5019     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5020   }
5021   if (!col){
5022     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5023   }
5024   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5025   PetscFunctionReturn(0);
5026 }
5027 
5028 #undef __FUNCT__
5029 #define __FUNCT__ "MatGetBrowsOfAcols"
5030 /*@C
5031     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5032 
5033     Collective on Mat
5034 
5035    Input Parameters:
5036 +    A,B - the matrices in mpiaij format
5037 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5038 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5039 
5040    Output Parameter:
5041 +    rowb, colb - index sets of rows and columns of B to extract
5042 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
5043 -    B_seq - the sequential matrix generated
5044 
5045     Level: developer
5046 
5047 @*/
5048 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
5049 {
5050   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5051   PetscErrorCode    ierr;
5052   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5053   IS                isrowb,iscolb;
5054   Mat               *bseq;
5055 
5056   PetscFunctionBegin;
5057   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5058     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5059   }
5060   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5061 
5062   if (scall == MAT_INITIAL_MATRIX){
5063     start = A->cmap->rstart;
5064     cmap  = a->garray;
5065     nzA   = a->A->cmap->n;
5066     nzB   = a->B->cmap->n;
5067     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5068     ncols = 0;
5069     for (i=0; i<nzB; i++) {  /* row < local row index */
5070       if (cmap[i] < start) idx[ncols++] = cmap[i];
5071       else break;
5072     }
5073     imark = i;
5074     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5075     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5076     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5077     *brstart = imark;
5078     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5079   } else {
5080     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5081     isrowb = *rowb; iscolb = *colb;
5082     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5083     bseq[0] = *B_seq;
5084   }
5085   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5086   *B_seq = bseq[0];
5087   ierr = PetscFree(bseq);CHKERRQ(ierr);
5088   if (!rowb){
5089     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5090   } else {
5091     *rowb = isrowb;
5092   }
5093   if (!colb){
5094     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5095   } else {
5096     *colb = iscolb;
5097   }
5098   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5099   PetscFunctionReturn(0);
5100 }
5101 
5102 #undef __FUNCT__
5103 #define __FUNCT__ "MatGetBrowsOfAoCols"
5104 /*@C
5105     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5106     of the OFF-DIAGONAL portion of local A
5107 
5108     Collective on Mat
5109 
5110    Input Parameters:
5111 +    A,B - the matrices in mpiaij format
5112 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5113 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5114 .    startsj_r - similar to startsj for receives
5115 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5116 
5117    Output Parameter:
5118 +    B_oth - the sequential matrix generated
5119 
5120     Level: developer
5121 
5122 @*/
5123 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5124 {
5125   VecScatter_MPI_General *gen_to,*gen_from;
5126   PetscErrorCode         ierr;
5127   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5128   Mat_SeqAIJ             *b_oth;
5129   VecScatter             ctx=a->Mvctx;
5130   MPI_Comm               comm=((PetscObject)ctx)->comm;
5131   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5132   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5133   PetscScalar            *rvalues,*svalues;
5134   MatScalar              *b_otha,*bufa,*bufA;
5135   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5136   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5137   MPI_Status             *sstatus,rstatus;
5138   PetscMPIInt            jj;
5139   PetscInt               *cols,sbs,rbs;
5140   PetscScalar            *vals;
5141 
5142   PetscFunctionBegin;
5143   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5144     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5145   }
5146   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5147   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5148 
5149   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5150   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5151   rvalues  = gen_from->values; /* holds the length of receiving row */
5152   svalues  = gen_to->values;   /* holds the length of sending row */
5153   nrecvs   = gen_from->n;
5154   nsends   = gen_to->n;
5155 
5156   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5157   srow     = gen_to->indices;   /* local row index to be sent */
5158   sstarts  = gen_to->starts;
5159   sprocs   = gen_to->procs;
5160   sstatus  = gen_to->sstatus;
5161   sbs      = gen_to->bs;
5162   rstarts  = gen_from->starts;
5163   rprocs   = gen_from->procs;
5164   rbs      = gen_from->bs;
5165 
5166   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5167   if (scall == MAT_INITIAL_MATRIX){
5168     /* i-array */
5169     /*---------*/
5170     /*  post receives */
5171     for (i=0; i<nrecvs; i++){
5172       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5173       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5174       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5175     }
5176 
5177     /* pack the outgoing message */
5178     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5179     sstartsj[0] = 0;  rstartsj[0] = 0;
5180     len = 0; /* total length of j or a array to be sent */
5181     k = 0;
5182     for (i=0; i<nsends; i++){
5183       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5184       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5185       for (j=0; j<nrows; j++) {
5186         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5187         for (l=0; l<sbs; l++){
5188           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5189           rowlen[j*sbs+l] = ncols;
5190           len += ncols;
5191           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5192         }
5193         k++;
5194       }
5195       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5196       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5197     }
5198     /* recvs and sends of i-array are completed */
5199     i = nrecvs;
5200     while (i--) {
5201       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5202     }
5203     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5204 
5205     /* allocate buffers for sending j and a arrays */
5206     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5207     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5208 
5209     /* create i-array of B_oth */
5210     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5211     b_othi[0] = 0;
5212     len = 0; /* total length of j or a array to be received */
5213     k = 0;
5214     for (i=0; i<nrecvs; i++){
5215       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5216       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5217       for (j=0; j<nrows; j++) {
5218         b_othi[k+1] = b_othi[k] + rowlen[j];
5219         len += rowlen[j]; k++;
5220       }
5221       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5222     }
5223 
5224     /* allocate space for j and a arrrays of B_oth */
5225     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5226     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5227 
5228     /* j-array */
5229     /*---------*/
5230     /*  post receives of j-array */
5231     for (i=0; i<nrecvs; i++){
5232       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5233       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5234     }
5235 
5236     /* pack the outgoing message j-array */
5237     k = 0;
5238     for (i=0; i<nsends; i++){
5239       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5240       bufJ = bufj+sstartsj[i];
5241       for (j=0; j<nrows; j++) {
5242         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5243         for (ll=0; ll<sbs; ll++){
5244           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5245           for (l=0; l<ncols; l++){
5246             *bufJ++ = cols[l];
5247           }
5248           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5249         }
5250       }
5251       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5252     }
5253 
5254     /* recvs and sends of j-array are completed */
5255     i = nrecvs;
5256     while (i--) {
5257       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5258     }
5259     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5260   } else if (scall == MAT_REUSE_MATRIX){
5261     sstartsj = *startsj;
5262     rstartsj = *startsj_r;
5263     bufa     = *bufa_ptr;
5264     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5265     b_otha   = b_oth->a;
5266   } else {
5267     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5268   }
5269 
5270   /* a-array */
5271   /*---------*/
5272   /*  post receives of a-array */
5273   for (i=0; i<nrecvs; i++){
5274     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5275     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5276   }
5277 
5278   /* pack the outgoing message a-array */
5279   k = 0;
5280   for (i=0; i<nsends; i++){
5281     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5282     bufA = bufa+sstartsj[i];
5283     for (j=0; j<nrows; j++) {
5284       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5285       for (ll=0; ll<sbs; ll++){
5286         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5287         for (l=0; l<ncols; l++){
5288           *bufA++ = vals[l];
5289         }
5290         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5291       }
5292     }
5293     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5294   }
5295   /* recvs and sends of a-array are completed */
5296   i = nrecvs;
5297   while (i--) {
5298     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5299   }
5300   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5301   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5302 
5303   if (scall == MAT_INITIAL_MATRIX){
5304     /* put together the new matrix */
5305     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5306 
5307     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5308     /* Since these are PETSc arrays, change flags to free them as necessary. */
5309     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5310     b_oth->free_a  = PETSC_TRUE;
5311     b_oth->free_ij = PETSC_TRUE;
5312     b_oth->nonew   = 0;
5313 
5314     ierr = PetscFree(bufj);CHKERRQ(ierr);
5315     if (!startsj || !bufa_ptr){
5316       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5317       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5318     } else {
5319       *startsj   = sstartsj;
5320       *startsj_r = rstartsj;
5321       *bufa_ptr  = bufa;
5322     }
5323   }
5324   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5325   PetscFunctionReturn(0);
5326 }
5327 
5328 #undef __FUNCT__
5329 #define __FUNCT__ "MatGetCommunicationStructs"
5330 /*@C
5331   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5332 
5333   Not Collective
5334 
5335   Input Parameters:
5336 . A - The matrix in mpiaij format
5337 
5338   Output Parameter:
5339 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5340 . colmap - A map from global column index to local index into lvec
5341 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5342 
5343   Level: developer
5344 
5345 @*/
5346 #if defined (PETSC_USE_CTABLE)
5347 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5348 #else
5349 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5350 #endif
5351 {
5352   Mat_MPIAIJ *a;
5353 
5354   PetscFunctionBegin;
5355   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5356   PetscValidPointer(lvec, 2);
5357   PetscValidPointer(colmap, 3);
5358   PetscValidPointer(multScatter, 4);
5359   a = (Mat_MPIAIJ *) A->data;
5360   if (lvec) *lvec = a->lvec;
5361   if (colmap) *colmap = a->colmap;
5362   if (multScatter) *multScatter = a->Mvctx;
5363   PetscFunctionReturn(0);
5364 }
5365 
5366 EXTERN_C_BEGIN
5367 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5368 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5369 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5370 EXTERN_C_END
5371 
5372 #undef __FUNCT__
5373 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5374 /*
5375     Computes (B'*A')' since computing B*A directly is untenable
5376 
5377                n                       p                          p
5378         (              )       (              )         (                  )
5379       m (      A       )  *  n (       B      )   =   m (         C        )
5380         (              )       (              )         (                  )
5381 
5382 */
5383 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5384 {
5385   PetscErrorCode     ierr;
5386   Mat                At,Bt,Ct;
5387 
5388   PetscFunctionBegin;
5389   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5390   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5391   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5392   ierr = MatDestroy(&At);CHKERRQ(ierr);
5393   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5394   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5395   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5396   PetscFunctionReturn(0);
5397 }
5398 
5399 #undef __FUNCT__
5400 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5401 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5402 {
5403   PetscErrorCode ierr;
5404   PetscInt       m=A->rmap->n,n=B->cmap->n;
5405   Mat            Cmat;
5406 
5407   PetscFunctionBegin;
5408   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5409   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5410   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5411   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5412   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5413   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5414   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5415   *C   = Cmat;
5416   PetscFunctionReturn(0);
5417 }
5418 
5419 /* ----------------------------------------------------------------*/
5420 #undef __FUNCT__
5421 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5422 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5423 {
5424   PetscErrorCode ierr;
5425 
5426   PetscFunctionBegin;
5427   if (scall == MAT_INITIAL_MATRIX){
5428     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5429   }
5430   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5431   PetscFunctionReturn(0);
5432 }
5433 
5434 EXTERN_C_BEGIN
5435 #if defined(PETSC_HAVE_MUMPS)
5436 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5437 #endif
5438 #if defined(PETSC_HAVE_PASTIX)
5439 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5440 #endif
5441 #if defined(PETSC_HAVE_SUPERLU_DIST)
5442 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5443 #endif
5444 #if defined(PETSC_HAVE_SPOOLES)
5445 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5446 #endif
5447 EXTERN_C_END
5448 
5449 /*MC
5450    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5451 
5452    Options Database Keys:
5453 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5454 
5455   Level: beginner
5456 
5457 .seealso: MatCreateMPIAIJ()
5458 M*/
5459 
5460 EXTERN_C_BEGIN
5461 #undef __FUNCT__
5462 #define __FUNCT__ "MatCreate_MPIAIJ"
5463 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5464 {
5465   Mat_MPIAIJ     *b;
5466   PetscErrorCode ierr;
5467   PetscMPIInt    size;
5468 
5469   PetscFunctionBegin;
5470   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5471 
5472   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5473   B->data         = (void*)b;
5474   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5475   B->rmap->bs     = 1;
5476   B->assembled    = PETSC_FALSE;
5477 
5478   B->insertmode   = NOT_SET_VALUES;
5479   b->size         = size;
5480   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5481 
5482   /* build cache for off array entries formed */
5483   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5484   b->donotstash  = PETSC_FALSE;
5485   b->colmap      = 0;
5486   b->garray      = 0;
5487   b->roworiented = PETSC_TRUE;
5488 
5489   /* stuff used for matrix vector multiply */
5490   b->lvec      = PETSC_NULL;
5491   b->Mvctx     = PETSC_NULL;
5492 
5493   /* stuff for MatGetRow() */
5494   b->rowindices   = 0;
5495   b->rowvalues    = 0;
5496   b->getrowactive = PETSC_FALSE;
5497 
5498 #if defined(PETSC_HAVE_SPOOLES)
5499   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5500                                      "MatGetFactor_mpiaij_spooles",
5501                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5502 #endif
5503 #if defined(PETSC_HAVE_MUMPS)
5504   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5505                                      "MatGetFactor_aij_mumps",
5506                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5507 #endif
5508 #if defined(PETSC_HAVE_PASTIX)
5509   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5510 					   "MatGetFactor_mpiaij_pastix",
5511 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5512 #endif
5513 #if defined(PETSC_HAVE_SUPERLU_DIST)
5514   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5515                                      "MatGetFactor_mpiaij_superlu_dist",
5516                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5517 #endif
5518   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5519                                      "MatStoreValues_MPIAIJ",
5520                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5521   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5522                                      "MatRetrieveValues_MPIAIJ",
5523                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5524   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5525 				     "MatGetDiagonalBlock_MPIAIJ",
5526                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5527   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5528 				     "MatIsTranspose_MPIAIJ",
5529 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5530   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5531 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5532 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5533   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5534 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5535 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5536   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5537 				     "MatDiagonalScaleLocal_MPIAIJ",
5538 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5539   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5540                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5541                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5542   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5543                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5544                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5545   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5546                                      "MatConvert_MPIAIJ_MPISBAIJ",
5547                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5548   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5549                                      "MatMatMult_MPIDense_MPIAIJ",
5550                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5551   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5552                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5553                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5554   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5555                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5556                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5557   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5558   PetscFunctionReturn(0);
5559 }
5560 EXTERN_C_END
5561 
5562 #undef __FUNCT__
5563 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5564 /*@
5565      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5566          and "off-diagonal" part of the matrix in CSR format.
5567 
5568    Collective on MPI_Comm
5569 
5570    Input Parameters:
5571 +  comm - MPI communicator
5572 .  m - number of local rows (Cannot be PETSC_DECIDE)
5573 .  n - This value should be the same as the local size used in creating the
5574        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5575        calculated if N is given) For square matrices n is almost always m.
5576 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5577 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5578 .   i - row indices for "diagonal" portion of matrix
5579 .   j - column indices
5580 .   a - matrix values
5581 .   oi - row indices for "off-diagonal" portion of matrix
5582 .   oj - column indices
5583 -   oa - matrix values
5584 
5585    Output Parameter:
5586 .   mat - the matrix
5587 
5588    Level: advanced
5589 
5590    Notes:
5591        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5592        must free the arrays once the matrix has been destroyed and not before.
5593 
5594        The i and j indices are 0 based
5595 
5596        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5597 
5598        This sets local rows and cannot be used to set off-processor values.
5599 
5600        You cannot later use MatSetValues() to change values in this matrix.
5601 
5602 .keywords: matrix, aij, compressed row, sparse, parallel
5603 
5604 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5605           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5606 @*/
5607 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5608 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5609 {
5610   PetscErrorCode ierr;
5611   Mat_MPIAIJ     *maij;
5612 
5613  PetscFunctionBegin;
5614   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5615   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5616   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5617   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5618   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5619   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5620   maij = (Mat_MPIAIJ*) (*mat)->data;
5621   maij->donotstash     = PETSC_TRUE;
5622   (*mat)->preallocated = PETSC_TRUE;
5623 
5624   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5625   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5626   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5627   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5628 
5629   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5630   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5631 
5632   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5633   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5634   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5635   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5636 
5637   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5638   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5639   PetscFunctionReturn(0);
5640 }
5641 
5642 /*
5643     Special version for direct calls from Fortran
5644 */
5645 #include <private/fortranimpl.h>
5646 
5647 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5648 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5650 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5651 #endif
5652 
5653 /* Change these macros so can be used in void function */
5654 #undef CHKERRQ
5655 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5656 #undef SETERRQ2
5657 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5658 #undef SETERRQ
5659 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5660 
5661 EXTERN_C_BEGIN
5662 #undef __FUNCT__
5663 #define __FUNCT__ "matsetvaluesmpiaij_"
5664 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5665 {
5666   Mat             mat = *mmat;
5667   PetscInt        m = *mm, n = *mn;
5668   InsertMode      addv = *maddv;
5669   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5670   PetscScalar     value;
5671   PetscErrorCode  ierr;
5672 
5673   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5674   if (mat->insertmode == NOT_SET_VALUES) {
5675     mat->insertmode = addv;
5676   }
5677 #if defined(PETSC_USE_DEBUG)
5678   else if (mat->insertmode != addv) {
5679     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5680   }
5681 #endif
5682   {
5683   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5684   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5685   PetscBool       roworiented = aij->roworiented;
5686 
5687   /* Some Variables required in the macro */
5688   Mat             A = aij->A;
5689   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5690   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5691   MatScalar       *aa = a->a;
5692   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5693   Mat             B = aij->B;
5694   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5695   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5696   MatScalar       *ba = b->a;
5697 
5698   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5699   PetscInt        nonew = a->nonew;
5700   MatScalar       *ap1,*ap2;
5701 
5702   PetscFunctionBegin;
5703   for (i=0; i<m; i++) {
5704     if (im[i] < 0) continue;
5705 #if defined(PETSC_USE_DEBUG)
5706     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5707 #endif
5708     if (im[i] >= rstart && im[i] < rend) {
5709       row      = im[i] - rstart;
5710       lastcol1 = -1;
5711       rp1      = aj + ai[row];
5712       ap1      = aa + ai[row];
5713       rmax1    = aimax[row];
5714       nrow1    = ailen[row];
5715       low1     = 0;
5716       high1    = nrow1;
5717       lastcol2 = -1;
5718       rp2      = bj + bi[row];
5719       ap2      = ba + bi[row];
5720       rmax2    = bimax[row];
5721       nrow2    = bilen[row];
5722       low2     = 0;
5723       high2    = nrow2;
5724 
5725       for (j=0; j<n; j++) {
5726         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5727         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5728         if (in[j] >= cstart && in[j] < cend){
5729           col = in[j] - cstart;
5730           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5731         } else if (in[j] < 0) continue;
5732 #if defined(PETSC_USE_DEBUG)
5733         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5734 #endif
5735         else {
5736           if (mat->was_assembled) {
5737             if (!aij->colmap) {
5738               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5739             }
5740 #if defined (PETSC_USE_CTABLE)
5741             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5742 	    col--;
5743 #else
5744             col = aij->colmap[in[j]] - 1;
5745 #endif
5746             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5747               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5748               col =  in[j];
5749               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5750               B = aij->B;
5751               b = (Mat_SeqAIJ*)B->data;
5752               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5753               rp2      = bj + bi[row];
5754               ap2      = ba + bi[row];
5755               rmax2    = bimax[row];
5756               nrow2    = bilen[row];
5757               low2     = 0;
5758               high2    = nrow2;
5759               bm       = aij->B->rmap->n;
5760               ba = b->a;
5761             }
5762           } else col = in[j];
5763           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5764         }
5765       }
5766     } else {
5767       if (!aij->donotstash) {
5768         if (roworiented) {
5769           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5770         } else {
5771           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5772         }
5773       }
5774     }
5775   }}
5776   PetscFunctionReturnVoid();
5777 }
5778 EXTERN_C_END
5779 
5780