xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 8ddb5d8b44839bc6385c771bd2d6d5c8cd353003)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew = a->nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506         } else if (in[j] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509 #endif
510         else {
511           if (mat->was_assembled) {
512             if (!aij->colmap) {
513               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
514             }
515 #if defined (PETSC_USE_CTABLE)
516             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
517 	    col--;
518 #else
519             col = aij->colmap[in[j]] - 1;
520 #endif
521             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523               col =  in[j];
524               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525               B = aij->B;
526               b = (Mat_SeqAIJ*)B->data;
527               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528               rp2      = bj + bi[row];
529               ap2      = ba + bi[row];
530               rmax2    = bimax[row];
531               nrow2    = bilen[row];
532               low2     = 0;
533               high2    = nrow2;
534               bm       = aij->B->rmap->n;
535               ba = b->a;
536             }
537           } else col = in[j];
538           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539         }
540       }
541     } else {
542       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543       if (!aij->donotstash) {
544         if (roworiented) {
545           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
546         } else {
547           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPIAIJ"
557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558 {
559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
560   PetscErrorCode ierr;
561   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
563 
564   PetscFunctionBegin;
565   for (i=0; i<m; i++) {
566     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568     if (idxm[i] >= rstart && idxm[i] < rend) {
569       row = idxm[i] - rstart;
570       for (j=0; j<n; j++) {
571         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573         if (idxn[j] >= cstart && idxn[j] < cend){
574           col = idxn[j] - cstart;
575           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
576         } else {
577           if (!aij->colmap) {
578             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
579           }
580 #if defined (PETSC_USE_CTABLE)
581           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
582           col --;
583 #else
584           col = aij->colmap[idxn[j]] - 1;
585 #endif
586           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587           else {
588             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604 {
605   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
606   PetscErrorCode ierr;
607   PetscInt       nstash,reallocs;
608   InsertMode     addv;
609 
610   PetscFunctionBegin;
611   if (aij->donotstash || mat->nooffprocentries) {
612     PetscFunctionReturn(0);
613   }
614 
615   /* make sure all processors are either in INSERTMODE or ADDMODE */
616   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
617   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618   mat->insertmode = addv; /* in case this processor had no cache */
619 
620   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
621   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
622   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
632   PetscErrorCode ierr;
633   PetscMPIInt    n;
634   PetscInt       i,j,rstart,ncols,flg;
635   PetscInt       *row,*col;
636   PetscBool      other_disassembled;
637   PetscScalar    *val;
638   InsertMode     addv = mat->insertmode;
639 
640   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
641   PetscFunctionBegin;
642   if (!aij->donotstash && !mat->nooffprocentries) {
643     while (1) {
644       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
645       if (!flg) break;
646 
647       for (i=0; i<n;) {
648         /* Now identify the consecutive vals belonging to the same row */
649         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650         if (j < n) ncols = j-i;
651         else       ncols = n-i;
652         /* Now assemble all these values with a single function call */
653         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
654         i = j;
655       }
656     }
657     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
658   }
659   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
660   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
661 
662   /* determine if any processor has disassembled, if so we must
663      also disassemble ourselfs, in order that we may reassemble. */
664   /*
665      if nonzero structure of submatrix B cannot change then we know that
666      no processor disassembled thus we can skip this stuff
667   */
668   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
669     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
670     if (mat->was_assembled && !other_disassembled) {
671       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
672     }
673   }
674   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
676   }
677   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
678   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
679   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
681 
682   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
683   aij->rowvalues = 0;
684 
685   /* used by MatAXPY() */
686   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
687   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
688 
689   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
690   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697 {
698   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
699   PetscErrorCode ierr;
700 
701   PetscFunctionBegin;
702   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
703   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatZeroRows_MPIAIJ"
709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710 {
711   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
712   PetscErrorCode    ierr;
713   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714   PetscInt          i,*owners = A->rmap->range;
715   PetscInt          *nprocs,j,idx,nsends,row;
716   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
717   PetscInt          *rvalues,count,base,slen,*source;
718   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
719   MPI_Comm          comm = ((PetscObject)A)->comm;
720   MPI_Request       *send_waits,*recv_waits;
721   MPI_Status        recv_status,*send_status;
722   const PetscScalar *xx;
723   PetscScalar       *bb;
724 #if defined(PETSC_DEBUG)
725   PetscBool      found = PETSC_FALSE;
726 #endif
727 
728   PetscFunctionBegin;
729   /*  first count number of contributors to each processor */
730   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
731   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
732   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
733   j = 0;
734   for (i=0; i<N; i++) {
735     if (lastidx > (idx = rows[i])) j = 0;
736     lastidx = idx;
737     for (; j<size; j++) {
738       if (idx >= owners[j] && idx < owners[j+1]) {
739         nprocs[2*j]++;
740         nprocs[2*j+1] = 1;
741         owner[i] = j;
742 #if defined(PETSC_DEBUG)
743         found = PETSC_TRUE;
744 #endif
745         break;
746       }
747     }
748 #if defined(PETSC_DEBUG)
749     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750     found = PETSC_FALSE;
751 #endif
752   }
753   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
754 
755   if (A->nooffproczerorows) {
756     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757     nrecvs = nsends;
758     nmax   = N;
759   } else {
760     /* inform other processors of number of messages and max length*/
761     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
762   }
763 
764   /* post receives:   */
765   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
766   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
767   for (i=0; i<nrecvs; i++) {
768     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
769   }
770 
771   /* do sends:
772       1) starts[i] gives the starting index in svalues for stuff going to
773          the ith processor
774   */
775   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
776   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
777   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
778   starts[0] = 0;
779   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780   for (i=0; i<N; i++) {
781     svalues[starts[owner[i]]++] = rows[i];
782   }
783 
784   starts[0] = 0;
785   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786   count = 0;
787   for (i=0; i<size; i++) {
788     if (nprocs[2*i+1]) {
789       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
790     }
791   }
792   ierr = PetscFree(starts);CHKERRQ(ierr);
793 
794   base = owners[rank];
795 
796   /*  wait on receives */
797   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
798   count  = nrecvs; slen = 0;
799   while (count) {
800     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
801     /* unpack receives into our local space */
802     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
803     source[imdex]  = recv_status.MPI_SOURCE;
804     lens[imdex]    = n;
805     slen          += n;
806     count--;
807   }
808   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
809 
810   /* move the data into the send scatter */
811   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
812   count = 0;
813   for (i=0; i<nrecvs; i++) {
814     values = rvalues + i*nmax;
815     for (j=0; j<lens[i]; j++) {
816       lrows[count++] = values[j] - base;
817     }
818   }
819   ierr = PetscFree(rvalues);CHKERRQ(ierr);
820   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
821   ierr = PetscFree(owner);CHKERRQ(ierr);
822   ierr = PetscFree(nprocs);CHKERRQ(ierr);
823 
824   /* fix right hand side if needed */
825   if (x && b) {
826     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
827     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
828     for (i=0; i<slen; i++) {
829       bb[lrows[i]] = diag*xx[lrows[i]];
830     }
831     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
833   }
834   /*
835         Zero the required rows. If the "diagonal block" of the matrix
836      is square and the user wishes to set the diagonal we use separate
837      code so that MatSetValues() is not called for each diagonal allocating
838      new memory, thus calling lots of mallocs and slowing things down.
839 
840   */
841   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
843   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850     }
851     for (i = 0; i < slen; i++) {
852       row  = lrows[i] + rstart;
853       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
854     }
855     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857   } else {
858     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(lrows);CHKERRQ(ierr);
861 
862   /* wait on sends */
863   if (nsends) {
864     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
865     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
866     ierr = PetscFree(send_status);CHKERRQ(ierr);
867   }
868   ierr = PetscFree(send_waits);CHKERRQ(ierr);
869   ierr = PetscFree(svalues);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
876 {
877   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode    ierr;
879   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
880   PetscInt          i,*owners = A->rmap->range;
881   PetscInt          *nprocs,j,idx,nsends;
882   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
883   PetscInt          *rvalues,count,base,slen,*source;
884   PetscInt          *lens,*lrows,*values,m;
885   MPI_Comm          comm = ((PetscObject)A)->comm;
886   MPI_Request       *send_waits,*recv_waits;
887   MPI_Status        recv_status,*send_status;
888   const PetscScalar *xx;
889   PetscScalar       *bb,*mask;
890   Vec               xmask,lmask;
891   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
892   const PetscInt    *aj, *ii,*ridx;
893   PetscScalar       *aa;
894 #if defined(PETSC_DEBUG)
895   PetscBool         found = PETSC_FALSE;
896 #endif
897 
898   PetscFunctionBegin;
899   /*  first count number of contributors to each processor */
900   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
901   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
902   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
903   j = 0;
904   for (i=0; i<N; i++) {
905     if (lastidx > (idx = rows[i])) j = 0;
906     lastidx = idx;
907     for (; j<size; j++) {
908       if (idx >= owners[j] && idx < owners[j+1]) {
909         nprocs[2*j]++;
910         nprocs[2*j+1] = 1;
911         owner[i] = j;
912 #if defined(PETSC_DEBUG)
913         found = PETSC_TRUE;
914 #endif
915         break;
916       }
917     }
918 #if defined(PETSC_DEBUG)
919     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
920     found = PETSC_FALSE;
921 #endif
922   }
923   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
924 
925   /* inform other processors of number of messages and max length*/
926   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
927 
928   /* post receives:   */
929   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
930   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
931   for (i=0; i<nrecvs; i++) {
932     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
933   }
934 
935   /* do sends:
936       1) starts[i] gives the starting index in svalues for stuff going to
937          the ith processor
938   */
939   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
940   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
941   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
942   starts[0] = 0;
943   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
944   for (i=0; i<N; i++) {
945     svalues[starts[owner[i]]++] = rows[i];
946   }
947 
948   starts[0] = 0;
949   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
950   count = 0;
951   for (i=0; i<size; i++) {
952     if (nprocs[2*i+1]) {
953       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
954     }
955   }
956   ierr = PetscFree(starts);CHKERRQ(ierr);
957 
958   base = owners[rank];
959 
960   /*  wait on receives */
961   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
962   count  = nrecvs; slen = 0;
963   while (count) {
964     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
965     /* unpack receives into our local space */
966     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
967     source[imdex]  = recv_status.MPI_SOURCE;
968     lens[imdex]    = n;
969     slen          += n;
970     count--;
971   }
972   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
973 
974   /* move the data into the send scatter */
975   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
976   count = 0;
977   for (i=0; i<nrecvs; i++) {
978     values = rvalues + i*nmax;
979     for (j=0; j<lens[i]; j++) {
980       lrows[count++] = values[j] - base;
981     }
982   }
983   ierr = PetscFree(rvalues);CHKERRQ(ierr);
984   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
985   ierr = PetscFree(owner);CHKERRQ(ierr);
986   ierr = PetscFree(nprocs);CHKERRQ(ierr);
987   /* lrows are the local rows to be zeroed, slen is the number of local rows */
988 
989   /* zero diagonal part of matrix */
990   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
991 
992   /* handle off diagonal part of matrix */
993   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
994   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
995   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
996   for (i=0; i<slen; i++) {
997     bb[lrows[i]] = 1;
998   }
999   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1000   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1003   if (x) {
1004     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1007     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1008   }
1009   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1010 
1011   /* remove zeroed rows of off diagonal matrix */
1012   ii = aij->i;
1013   for (i=0; i<slen; i++) {
1014     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1015   }
1016 
1017   /* loop over all elements of off process part of matrix zeroing removed columns*/
1018   if (aij->compressedrow.use){
1019     m    = aij->compressedrow.nrows;
1020     ii   = aij->compressedrow.i;
1021     ridx = aij->compressedrow.rindex;
1022     for (i=0; i<m; i++){
1023       n   = ii[i+1] - ii[i];
1024       aj  = aij->j + ii[i];
1025       aa  = aij->a + ii[i];
1026 
1027       for (j=0; j<n; j++) {
1028         if (PetscAbsScalar(mask[*aj])) {
1029           if (b) bb[*ridx] -= *aa*xx[*aj];
1030           *aa        = 0.0;
1031         }
1032         aa++;
1033         aj++;
1034       }
1035       ridx++;
1036     }
1037   } else { /* do not use compressed row format */
1038     m = l->B->rmap->n;
1039     for (i=0; i<m; i++) {
1040       n   = ii[i+1] - ii[i];
1041       aj  = aij->j + ii[i];
1042       aa  = aij->a + ii[i];
1043       for (j=0; j<n; j++) {
1044         if (PetscAbsScalar(mask[*aj])) {
1045           if (b) bb[i] -= *aa*xx[*aj];
1046           *aa    = 0.0;
1047         }
1048         aa++;
1049         aj++;
1050       }
1051     }
1052   }
1053   if (x) {
1054     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1055     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1056   }
1057   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1058   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1059   ierr = PetscFree(lrows);CHKERRQ(ierr);
1060 
1061   /* wait on sends */
1062   if (nsends) {
1063     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1064     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1065     ierr = PetscFree(send_status);CHKERRQ(ierr);
1066   }
1067   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1068   ierr = PetscFree(svalues);CHKERRQ(ierr);
1069 
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatMult_MPIAIJ"
1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1076 {
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078   PetscErrorCode ierr;
1079   PetscInt       nt;
1080 
1081   PetscFunctionBegin;
1082   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1083   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1084   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1085   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1086   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1087   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1094 {
1095   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124   PetscBool      merged;
1125 
1126   PetscFunctionBegin;
1127   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1128   /* do nondiagonal part */
1129   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1130   if (!merged) {
1131     /* send it on its way */
1132     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1133     /* do local part */
1134     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1135     /* receive remote parts: note this assumes the values are not actually */
1136     /* added in yy until the next line, */
1137     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138   } else {
1139     /* do local part */
1140     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1141     /* send it on its way */
1142     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143     /* values actually were received in the Begin() but we need to call this nop */
1144     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1145   }
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 EXTERN_C_BEGIN
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1152 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1153 {
1154   MPI_Comm       comm;
1155   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1156   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1157   IS             Me,Notme;
1158   PetscErrorCode ierr;
1159   PetscInt       M,N,first,last,*notme,i;
1160   PetscMPIInt    size;
1161 
1162   PetscFunctionBegin;
1163 
1164   /* Easy test: symmetric diagonal block */
1165   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1166   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1167   if (!*f) PetscFunctionReturn(0);
1168   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1169   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1170   if (size == 1) PetscFunctionReturn(0);
1171 
1172   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1173   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1174   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1175   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1176   for (i=0; i<first; i++) notme[i] = i;
1177   for (i=last; i<M; i++) notme[i-last+first] = i;
1178   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1179   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1180   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1181   Aoff = Aoffs[0];
1182   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1183   Boff = Boffs[0];
1184   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1185   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1186   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1187   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1188   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1189   ierr = PetscFree(notme);CHKERRQ(ierr);
1190    PetscFunctionReturn(0);
1191 }
1192 EXTERN_C_END
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1197 {
1198   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   /* do nondiagonal part */
1203   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1204   /* send it on its way */
1205   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1206   /* do local part */
1207   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1208   /* receive remote parts */
1209   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 /*
1214   This only works correctly for square matrices where the subblock A->A is the
1215    diagonal block
1216 */
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1220 {
1221   PetscErrorCode ierr;
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223 
1224   PetscFunctionBegin;
1225   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1226   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1227   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatScale_MPIAIJ"
1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1234 {
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1240   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatDestroy_MPIAIJ"
1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1247 {
1248   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252 #if defined(PETSC_USE_LOG)
1253   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1254 #endif
1255   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1256   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1257   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1258   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1259 #if defined (PETSC_USE_CTABLE)
1260   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1261 #else
1262   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1263 #endif
1264   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1266   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1267   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1268   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1269   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1270 
1271   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1286 {
1287   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1288   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1289   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1290   PetscErrorCode    ierr;
1291   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1292   int               fd;
1293   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1294   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1295   PetscScalar       *column_values;
1296   PetscInt          message_count,flowcontrolcount;
1297 
1298   PetscFunctionBegin;
1299   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1300   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1301   nz   = A->nz + B->nz;
1302   if (!rank) {
1303     header[0] = MAT_FILE_CLASSID;
1304     header[1] = mat->rmap->N;
1305     header[2] = mat->cmap->N;
1306     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1307     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1308     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1309     /* get largest number of rows any processor has */
1310     rlen = mat->rmap->n;
1311     range = mat->rmap->range;
1312     for (i=1; i<size; i++) {
1313       rlen = PetscMax(rlen,range[i+1] - range[i]);
1314     }
1315   } else {
1316     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1317     rlen = mat->rmap->n;
1318   }
1319 
1320   /* load up the local row counts */
1321   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1322   for (i=0; i<mat->rmap->n; i++) {
1323     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1324   }
1325 
1326   /* store the row lengths to the file */
1327   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1328   if (!rank) {
1329     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1330     for (i=1; i<size; i++) {
1331       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1332       rlen = range[i+1] - range[i];
1333       ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1334       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     }
1336     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1337   } else {
1338     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1339     ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1341   }
1342   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1343 
1344   /* load up the local column indices */
1345   nzmax = nz; /* )th processor needs space a largest processor needs */
1346   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1347   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1348   cnt  = 0;
1349   for (i=0; i<mat->rmap->n; i++) {
1350     for (j=B->i[i]; j<B->i[i+1]; j++) {
1351       if ( (col = garray[B->j[j]]) > cstart) break;
1352       column_indices[cnt++] = col;
1353     }
1354     for (k=A->i[i]; k<A->i[i+1]; k++) {
1355       column_indices[cnt++] = A->j[k] + cstart;
1356     }
1357     for (; j<B->i[i+1]; j++) {
1358       column_indices[cnt++] = garray[B->j[j]];
1359     }
1360   }
1361   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1362 
1363   /* store the column indices to the file */
1364    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1365   if (!rank) {
1366     MPI_Status status;
1367     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1368     for (i=1; i<size; i++) {
1369       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1370       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1371       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1372       ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1373       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1374     }
1375      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1376   } else {
1377     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1378     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379     ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1380     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1381   }
1382   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1383 
1384   /* load up the local column values */
1385   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1386   cnt  = 0;
1387   for (i=0; i<mat->rmap->n; i++) {
1388     for (j=B->i[i]; j<B->i[i+1]; j++) {
1389       if ( garray[B->j[j]] > cstart) break;
1390       column_values[cnt++] = B->a[j];
1391     }
1392     for (k=A->i[i]; k<A->i[i+1]; k++) {
1393       column_values[cnt++] = A->a[k];
1394     }
1395     for (; j<B->i[i+1]; j++) {
1396       column_values[cnt++] = B->a[j];
1397     }
1398   }
1399   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1400 
1401   /* store the column values to the file */
1402    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1403   if (!rank) {
1404     MPI_Status status;
1405     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1406     for (i=1; i<size; i++) {
1407        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1408       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1409       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1410       ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1411       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1412     }
1413     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1414   } else {
1415     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1416     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417     ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1418     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1419   }
1420   ierr = PetscFree(column_values);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1427 {
1428   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1429   PetscErrorCode    ierr;
1430   PetscMPIInt       rank = aij->rank,size = aij->size;
1431   PetscBool         isdraw,iascii,isbinary;
1432   PetscViewer       sviewer;
1433   PetscViewerFormat format;
1434 
1435   PetscFunctionBegin;
1436   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1437   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1438   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1439   if (iascii) {
1440     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1441     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1442       MatInfo    info;
1443       PetscBool  inodes;
1444 
1445       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1446       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1447       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1448       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1449       if (!inodes) {
1450         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1451 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1452       } else {
1453         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1454 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1455       }
1456       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1457       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1458       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1459       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1460       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1461       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1462       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1463       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1464       PetscFunctionReturn(0);
1465     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1466       PetscInt   inodecount,inodelimit,*inodes;
1467       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1468       if (inodes) {
1469         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1472       }
1473       PetscFunctionReturn(0);
1474     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1475       PetscFunctionReturn(0);
1476     }
1477   } else if (isbinary) {
1478     if (size == 1) {
1479       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1480       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1481     } else {
1482       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1483     }
1484     PetscFunctionReturn(0);
1485   } else if (isdraw) {
1486     PetscDraw  draw;
1487     PetscBool  isnull;
1488     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1489     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1490   }
1491 
1492   if (size == 1) {
1493     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1494     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1495   } else {
1496     /* assemble the entire matrix onto first processor. */
1497     Mat         A;
1498     Mat_SeqAIJ  *Aloc;
1499     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1500     MatScalar   *a;
1501 
1502     if (mat->rmap->N > 1024) {
1503       PetscBool  flg = PETSC_FALSE;
1504 
1505       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1506       if (!flg) {
1507         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1508       }
1509     }
1510 
1511     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1512     if (!rank) {
1513       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1514     } else {
1515       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1516     }
1517     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1518     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1519     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1520     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1521     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1522 
1523     /* copy over the A part */
1524     Aloc = (Mat_SeqAIJ*)aij->A->data;
1525     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1526     row = mat->rmap->rstart;
1527     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1528     for (i=0; i<m; i++) {
1529       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1530       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1531     }
1532     aj = Aloc->j;
1533     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1534 
1535     /* copy over the B part */
1536     Aloc = (Mat_SeqAIJ*)aij->B->data;
1537     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1538     row  = mat->rmap->rstart;
1539     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1540     ct   = cols;
1541     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1542     for (i=0; i<m; i++) {
1543       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1544       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1545     }
1546     ierr = PetscFree(ct);CHKERRQ(ierr);
1547     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1548     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1549     /*
1550        Everyone has to call to draw the matrix since the graphics waits are
1551        synchronized across all processors that share the PetscDraw object
1552     */
1553     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1554     if (!rank) {
1555       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1556       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1557       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1558       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1559     }
1560     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1561     ierr = MatDestroy(&A);CHKERRQ(ierr);
1562   }
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatView_MPIAIJ"
1568 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1569 {
1570   PetscErrorCode ierr;
1571   PetscBool      iascii,isdraw,issocket,isbinary;
1572 
1573   PetscFunctionBegin;
1574   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1575   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1576   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1577   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1578   if (iascii || isdraw || isbinary || issocket) {
1579     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1580   } else {
1581     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1582   }
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatSOR_MPIAIJ"
1588 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1589 {
1590   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1591   PetscErrorCode ierr;
1592   Vec            bb1 = 0;
1593   PetscBool      hasop;
1594 
1595   PetscFunctionBegin;
1596   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1597     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1598   }
1599 
1600   if (flag == SOR_APPLY_UPPER) {
1601     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1602     PetscFunctionReturn(0);
1603   }
1604 
1605   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1606     if (flag & SOR_ZERO_INITIAL_GUESS) {
1607       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1608       its--;
1609     }
1610 
1611     while (its--) {
1612       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1613       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1614 
1615       /* update rhs: bb1 = bb - B*x */
1616       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1617       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1618 
1619       /* local sweep */
1620       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1621     }
1622   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1623     if (flag & SOR_ZERO_INITIAL_GUESS) {
1624       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1625       its--;
1626     }
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   }  else if (flag & SOR_EISENSTAT) {
1655     Vec         xx1;
1656 
1657     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1658     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1659 
1660     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662     if (!mat->diag) {
1663       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1664       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1665     }
1666     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1667     if (hasop) {
1668       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1669     } else {
1670       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1671     }
1672     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1673 
1674     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1675 
1676     /* local sweep */
1677     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1678     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1679     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1680   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1681 
1682   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatPermute_MPIAIJ"
1688 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1689 {
1690   MPI_Comm       comm;
1691   PetscInt       first,local_rowsize,local_colsize;
1692   const PetscInt *rows;
1693   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1694   PetscErrorCode ierr;
1695 
1696   PetscFunctionBegin;
1697   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1698   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1699   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1700   /* collect the global row permutation and invert it */
1701   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1702   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1703   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1704   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1705   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1706   /* get the local target indices */
1707   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1708   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1709   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1710   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1711   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1712   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1713   /* the column permutation is so much easier;
1714      make a local version of 'colp' and invert it */
1715   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1716   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1717   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1718   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1719   /* now we just get the submatrix */
1720   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1721   /* clean up */
1722   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1723   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1729 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1730 {
1731   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1732   Mat            A = mat->A,B = mat->B;
1733   PetscErrorCode ierr;
1734   PetscReal      isend[5],irecv[5];
1735 
1736   PetscFunctionBegin;
1737   info->block_size     = 1.0;
1738   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1739   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1740   isend[3] = info->memory;  isend[4] = info->mallocs;
1741   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1742   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1743   isend[3] += info->memory;  isend[4] += info->mallocs;
1744   if (flag == MAT_LOCAL) {
1745     info->nz_used      = isend[0];
1746     info->nz_allocated = isend[1];
1747     info->nz_unneeded  = isend[2];
1748     info->memory       = isend[3];
1749     info->mallocs      = isend[4];
1750   } else if (flag == MAT_GLOBAL_MAX) {
1751     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1752     info->nz_used      = irecv[0];
1753     info->nz_allocated = irecv[1];
1754     info->nz_unneeded  = irecv[2];
1755     info->memory       = irecv[3];
1756     info->mallocs      = irecv[4];
1757   } else if (flag == MAT_GLOBAL_SUM) {
1758     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1759     info->nz_used      = irecv[0];
1760     info->nz_allocated = irecv[1];
1761     info->nz_unneeded  = irecv[2];
1762     info->memory       = irecv[3];
1763     info->mallocs      = irecv[4];
1764   }
1765   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1766   info->fill_ratio_needed = 0;
1767   info->factor_mallocs    = 0;
1768 
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 #undef __FUNCT__
1773 #define __FUNCT__ "MatSetOption_MPIAIJ"
1774 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1775 {
1776   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1777   PetscErrorCode ierr;
1778 
1779   PetscFunctionBegin;
1780   switch (op) {
1781   case MAT_NEW_NONZERO_LOCATIONS:
1782   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1783   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1784   case MAT_KEEP_NONZERO_PATTERN:
1785   case MAT_NEW_NONZERO_LOCATION_ERR:
1786   case MAT_USE_INODES:
1787   case MAT_IGNORE_ZERO_ENTRIES:
1788     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1789     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1790     break;
1791   case MAT_ROW_ORIENTED:
1792     a->roworiented = flg;
1793     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1794     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1795     break;
1796   case MAT_NEW_DIAGONALS:
1797     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1798     break;
1799   case MAT_IGNORE_OFF_PROC_ENTRIES:
1800     a->donotstash = flg;
1801     break;
1802   case MAT_SPD:
1803     A->spd_set                         = PETSC_TRUE;
1804     A->spd                             = flg;
1805     if (flg) {
1806       A->symmetric                     = PETSC_TRUE;
1807       A->structurally_symmetric        = PETSC_TRUE;
1808       A->symmetric_set                 = PETSC_TRUE;
1809       A->structurally_symmetric_set    = PETSC_TRUE;
1810     }
1811     break;
1812   case MAT_SYMMETRIC:
1813     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1814     break;
1815   case MAT_STRUCTURALLY_SYMMETRIC:
1816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1817     break;
1818   case MAT_HERMITIAN:
1819     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1820     break;
1821   case MAT_SYMMETRY_ETERNAL:
1822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1823     break;
1824   default:
1825     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1826   }
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 #undef __FUNCT__
1831 #define __FUNCT__ "MatGetRow_MPIAIJ"
1832 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1833 {
1834   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1835   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1836   PetscErrorCode ierr;
1837   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1838   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1839   PetscInt       *cmap,*idx_p;
1840 
1841   PetscFunctionBegin;
1842   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1843   mat->getrowactive = PETSC_TRUE;
1844 
1845   if (!mat->rowvalues && (idx || v)) {
1846     /*
1847         allocate enough space to hold information from the longest row.
1848     */
1849     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1850     PetscInt   max = 1,tmp;
1851     for (i=0; i<matin->rmap->n; i++) {
1852       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1853       if (max < tmp) { max = tmp; }
1854     }
1855     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1856   }
1857 
1858   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1859   lrow = row - rstart;
1860 
1861   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1862   if (!v)   {pvA = 0; pvB = 0;}
1863   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1864   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1865   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1866   nztot = nzA + nzB;
1867 
1868   cmap  = mat->garray;
1869   if (v  || idx) {
1870     if (nztot) {
1871       /* Sort by increasing column numbers, assuming A and B already sorted */
1872       PetscInt imark = -1;
1873       if (v) {
1874         *v = v_p = mat->rowvalues;
1875         for (i=0; i<nzB; i++) {
1876           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1877           else break;
1878         }
1879         imark = i;
1880         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1881         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1882       }
1883       if (idx) {
1884         *idx = idx_p = mat->rowindices;
1885         if (imark > -1) {
1886           for (i=0; i<imark; i++) {
1887             idx_p[i] = cmap[cworkB[i]];
1888           }
1889         } else {
1890           for (i=0; i<nzB; i++) {
1891             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1892             else break;
1893           }
1894           imark = i;
1895         }
1896         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1897         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1898       }
1899     } else {
1900       if (idx) *idx = 0;
1901       if (v)   *v   = 0;
1902     }
1903   }
1904   *nz = nztot;
1905   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1906   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1912 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1913 {
1914   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1915 
1916   PetscFunctionBegin;
1917   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1918   aij->getrowactive = PETSC_FALSE;
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatNorm_MPIAIJ"
1924 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1925 {
1926   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1927   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1928   PetscErrorCode ierr;
1929   PetscInt       i,j,cstart = mat->cmap->rstart;
1930   PetscReal      sum = 0.0;
1931   MatScalar      *v;
1932 
1933   PetscFunctionBegin;
1934   if (aij->size == 1) {
1935     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1936   } else {
1937     if (type == NORM_FROBENIUS) {
1938       v = amat->a;
1939       for (i=0; i<amat->nz; i++) {
1940 #if defined(PETSC_USE_COMPLEX)
1941         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1942 #else
1943         sum += (*v)*(*v); v++;
1944 #endif
1945       }
1946       v = bmat->a;
1947       for (i=0; i<bmat->nz; i++) {
1948 #if defined(PETSC_USE_COMPLEX)
1949         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1950 #else
1951         sum += (*v)*(*v); v++;
1952 #endif
1953       }
1954       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1955       *norm = PetscSqrtReal(*norm);
1956     } else if (type == NORM_1) { /* max column norm */
1957       PetscReal *tmp,*tmp2;
1958       PetscInt  *jj,*garray = aij->garray;
1959       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1960       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1961       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1962       *norm = 0.0;
1963       v = amat->a; jj = amat->j;
1964       for (j=0; j<amat->nz; j++) {
1965         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1966       }
1967       v = bmat->a; jj = bmat->j;
1968       for (j=0; j<bmat->nz; j++) {
1969         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1970       }
1971       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1972       for (j=0; j<mat->cmap->N; j++) {
1973         if (tmp2[j] > *norm) *norm = tmp2[j];
1974       }
1975       ierr = PetscFree(tmp);CHKERRQ(ierr);
1976       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1977     } else if (type == NORM_INFINITY) { /* max row norm */
1978       PetscReal ntemp = 0.0;
1979       for (j=0; j<aij->A->rmap->n; j++) {
1980         v = amat->a + amat->i[j];
1981         sum = 0.0;
1982         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1983           sum += PetscAbsScalar(*v); v++;
1984         }
1985         v = bmat->a + bmat->i[j];
1986         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1987           sum += PetscAbsScalar(*v); v++;
1988         }
1989         if (sum > ntemp) ntemp = sum;
1990       }
1991       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1992     } else {
1993       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1994     }
1995   }
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 #undef __FUNCT__
2000 #define __FUNCT__ "MatTranspose_MPIAIJ"
2001 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2002 {
2003   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2004   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2005   PetscErrorCode ierr;
2006   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2007   PetscInt       cstart=A->cmap->rstart,ncol;
2008   Mat            B;
2009   MatScalar      *array;
2010 
2011   PetscFunctionBegin;
2012   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2013 
2014   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2015   ai = Aloc->i; aj = Aloc->j;
2016   bi = Bloc->i; bj = Bloc->j;
2017   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2018     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2019     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2020     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2021     for (i=0; i<ai[ma]; i++){
2022       d_nnz[aj[i]] ++;
2023       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2024     }
2025 
2026     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2027     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2028     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2029     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2030     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2031   } else {
2032     B = *matout;
2033   }
2034 
2035   /* copy over the A part */
2036   array = Aloc->a;
2037   row = A->rmap->rstart;
2038   for (i=0; i<ma; i++) {
2039     ncol = ai[i+1]-ai[i];
2040     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2041     row++; array += ncol; aj += ncol;
2042   }
2043   aj = Aloc->j;
2044   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2045 
2046   /* copy over the B part */
2047   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2048   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2049   array = Bloc->a;
2050   row = A->rmap->rstart;
2051   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2052   cols_tmp = cols;
2053   for (i=0; i<mb; i++) {
2054     ncol = bi[i+1]-bi[i];
2055     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2056     row++; array += ncol; cols_tmp += ncol;
2057   }
2058   ierr = PetscFree(cols);CHKERRQ(ierr);
2059 
2060   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2061   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2062   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2063     *matout = B;
2064   } else {
2065     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2066   }
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2072 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2073 {
2074   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2075   Mat            a = aij->A,b = aij->B;
2076   PetscErrorCode ierr;
2077   PetscInt       s1,s2,s3;
2078 
2079   PetscFunctionBegin;
2080   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2081   if (rr) {
2082     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2083     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2084     /* Overlap communication with computation. */
2085     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2086   }
2087   if (ll) {
2088     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2089     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2090     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2091   }
2092   /* scale  the diagonal block */
2093   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2094 
2095   if (rr) {
2096     /* Do a scatter end and then right scale the off-diagonal block */
2097     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2098     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2099   }
2100 
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 #undef __FUNCT__
2105 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2106 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2107 {
2108   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2109   PetscErrorCode ierr;
2110 
2111   PetscFunctionBegin;
2112   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2113   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2114   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2115   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2116   PetscFunctionReturn(0);
2117 }
2118 #undef __FUNCT__
2119 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2120 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2121 {
2122   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2123   PetscErrorCode ierr;
2124 
2125   PetscFunctionBegin;
2126   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 #undef __FUNCT__
2131 #define __FUNCT__ "MatEqual_MPIAIJ"
2132 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2133 {
2134   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2135   Mat            a,b,c,d;
2136   PetscBool      flg;
2137   PetscErrorCode ierr;
2138 
2139   PetscFunctionBegin;
2140   a = matA->A; b = matA->B;
2141   c = matB->A; d = matB->B;
2142 
2143   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2144   if (flg) {
2145     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2146   }
2147   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatCopy_MPIAIJ"
2153 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2154 {
2155   PetscErrorCode ierr;
2156   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2157   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2158 
2159   PetscFunctionBegin;
2160   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2161   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2162     /* because of the column compression in the off-processor part of the matrix a->B,
2163        the number of columns in a->B and b->B may be different, hence we cannot call
2164        the MatCopy() directly on the two parts. If need be, we can provide a more
2165        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2166        then copying the submatrices */
2167     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2168   } else {
2169     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2170     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2171   }
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 #undef __FUNCT__
2176 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2177 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2178 {
2179   PetscErrorCode ierr;
2180 
2181   PetscFunctionBegin;
2182   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2183   PetscFunctionReturn(0);
2184 }
2185 
2186 #undef __FUNCT__
2187 #define __FUNCT__ "MatAXPY_MPIAIJ"
2188 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2189 {
2190   PetscErrorCode ierr;
2191   PetscInt       i;
2192   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2193   PetscBLASInt   bnz,one=1;
2194   Mat_SeqAIJ     *x,*y;
2195 
2196   PetscFunctionBegin;
2197   if (str == SAME_NONZERO_PATTERN) {
2198     PetscScalar alpha = a;
2199     x = (Mat_SeqAIJ *)xx->A->data;
2200     y = (Mat_SeqAIJ *)yy->A->data;
2201     bnz = PetscBLASIntCast(x->nz);
2202     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2203     x = (Mat_SeqAIJ *)xx->B->data;
2204     y = (Mat_SeqAIJ *)yy->B->data;
2205     bnz = PetscBLASIntCast(x->nz);
2206     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2207   } else if (str == SUBSET_NONZERO_PATTERN) {
2208     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2209 
2210     x = (Mat_SeqAIJ *)xx->B->data;
2211     y = (Mat_SeqAIJ *)yy->B->data;
2212     if (y->xtoy && y->XtoY != xx->B) {
2213       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2214       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2215     }
2216     if (!y->xtoy) { /* get xtoy */
2217       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2218       y->XtoY = xx->B;
2219       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2220     }
2221     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2222   } else {
2223     Mat B;
2224     PetscInt *nnz_d,*nnz_o;
2225     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2226     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2227     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2228     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2229     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2230     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2231     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2232     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2233     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2234     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2235     ierr = MatHeaderReplace(Y,B);
2236     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2237     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2238   }
2239   PetscFunctionReturn(0);
2240 }
2241 
2242 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2243 
2244 #undef __FUNCT__
2245 #define __FUNCT__ "MatConjugate_MPIAIJ"
2246 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2247 {
2248 #if defined(PETSC_USE_COMPLEX)
2249   PetscErrorCode ierr;
2250   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2251 
2252   PetscFunctionBegin;
2253   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2254   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2255 #else
2256   PetscFunctionBegin;
2257 #endif
2258   PetscFunctionReturn(0);
2259 }
2260 
2261 #undef __FUNCT__
2262 #define __FUNCT__ "MatRealPart_MPIAIJ"
2263 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2264 {
2265   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2266   PetscErrorCode ierr;
2267 
2268   PetscFunctionBegin;
2269   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2270   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 #undef __FUNCT__
2275 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2276 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2277 {
2278   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2279   PetscErrorCode ierr;
2280 
2281   PetscFunctionBegin;
2282   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2283   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 #ifdef PETSC_HAVE_PBGL
2288 
2289 #include <boost/parallel/mpi/bsp_process_group.hpp>
2290 #include <boost/graph/distributed/ilu_default_graph.hpp>
2291 #include <boost/graph/distributed/ilu_0_block.hpp>
2292 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2293 #include <boost/graph/distributed/petsc/interface.hpp>
2294 #include <boost/multi_array.hpp>
2295 #include <boost/parallel/distributed_property_map->hpp>
2296 
2297 #undef __FUNCT__
2298 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2299 /*
2300   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2301 */
2302 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2303 {
2304   namespace petsc = boost::distributed::petsc;
2305 
2306   namespace graph_dist = boost::graph::distributed;
2307   using boost::graph::distributed::ilu_default::process_group_type;
2308   using boost::graph::ilu_permuted;
2309 
2310   PetscBool       row_identity, col_identity;
2311   PetscContainer  c;
2312   PetscInt        m, n, M, N;
2313   PetscErrorCode  ierr;
2314 
2315   PetscFunctionBegin;
2316   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2317   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2318   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2319   if (!row_identity || !col_identity) {
2320     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2321   }
2322 
2323   process_group_type pg;
2324   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2325   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2326   lgraph_type&   level_graph = *lgraph_p;
2327   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2328 
2329   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2330   ilu_permuted(level_graph);
2331 
2332   /* put together the new matrix */
2333   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2334   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2335   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2336   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2337   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2338   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2339   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2340 
2341   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2342   ierr = PetscContainerSetPointer(c, lgraph_p);
2343   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2344   ierr = PetscContainerDestroy(&c);
2345   PetscFunctionReturn(0);
2346 }
2347 
2348 #undef __FUNCT__
2349 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2350 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2351 {
2352   PetscFunctionBegin;
2353   PetscFunctionReturn(0);
2354 }
2355 
2356 #undef __FUNCT__
2357 #define __FUNCT__ "MatSolve_MPIAIJ"
2358 /*
2359   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2360 */
2361 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2362 {
2363   namespace graph_dist = boost::graph::distributed;
2364 
2365   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2366   lgraph_type*   lgraph_p;
2367   PetscContainer c;
2368   PetscErrorCode ierr;
2369 
2370   PetscFunctionBegin;
2371   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2372   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2373   ierr = VecCopy(b, x);CHKERRQ(ierr);
2374 
2375   PetscScalar* array_x;
2376   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2377   PetscInt sx;
2378   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2379 
2380   PetscScalar* array_b;
2381   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2382   PetscInt sb;
2383   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2384 
2385   lgraph_type&   level_graph = *lgraph_p;
2386   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2387 
2388   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2389   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2390                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2391 
2392   typedef boost::iterator_property_map<array_ref_type::iterator,
2393                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2394   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2395                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2396 
2397   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2398 
2399   PetscFunctionReturn(0);
2400 }
2401 #endif
2402 
2403 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2404   PetscInt       nzlocal,nsends,nrecvs;
2405   PetscMPIInt    *send_rank,*recv_rank;
2406   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2407   PetscScalar    *sbuf_a,**rbuf_a;
2408   PetscErrorCode (*Destroy)(Mat);
2409 } Mat_Redundant;
2410 
2411 #undef __FUNCT__
2412 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2413 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2414 {
2415   PetscErrorCode       ierr;
2416   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2417   PetscInt             i;
2418 
2419   PetscFunctionBegin;
2420   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2421   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2422   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2423   for (i=0; i<redund->nrecvs; i++){
2424     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2425     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2426   }
2427   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2428   ierr = PetscFree(redund);CHKERRQ(ierr);
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "MatDestroy_MatRedundant"
2434 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2435 {
2436   PetscErrorCode  ierr;
2437   PetscContainer  container;
2438   Mat_Redundant   *redund=PETSC_NULL;
2439 
2440   PetscFunctionBegin;
2441   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2442   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2443   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2444   A->ops->destroy = redund->Destroy;
2445   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2446   if (A->ops->destroy) {
2447     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2448   }
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 #undef __FUNCT__
2453 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2454 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2455 {
2456   PetscMPIInt    rank,size;
2457   MPI_Comm       comm=((PetscObject)mat)->comm;
2458   PetscErrorCode ierr;
2459   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2460   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2461   PetscInt       *rowrange=mat->rmap->range;
2462   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2463   Mat            A=aij->A,B=aij->B,C=*matredundant;
2464   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2465   PetscScalar    *sbuf_a;
2466   PetscInt       nzlocal=a->nz+b->nz;
2467   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2468   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2469   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2470   MatScalar      *aworkA,*aworkB;
2471   PetscScalar    *vals;
2472   PetscMPIInt    tag1,tag2,tag3,imdex;
2473   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2474                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2475   MPI_Status     recv_status,*send_status;
2476   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2477   PetscInt       **rbuf_j=PETSC_NULL;
2478   PetscScalar    **rbuf_a=PETSC_NULL;
2479   Mat_Redundant  *redund=PETSC_NULL;
2480   PetscContainer container;
2481 
2482   PetscFunctionBegin;
2483   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2484   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2485 
2486   if (reuse == MAT_REUSE_MATRIX) {
2487     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2488     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2489     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2490     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2491     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2492     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2493     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2494     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2495 
2496     nsends    = redund->nsends;
2497     nrecvs    = redund->nrecvs;
2498     send_rank = redund->send_rank;
2499     recv_rank = redund->recv_rank;
2500     sbuf_nz   = redund->sbuf_nz;
2501     rbuf_nz   = redund->rbuf_nz;
2502     sbuf_j    = redund->sbuf_j;
2503     sbuf_a    = redund->sbuf_a;
2504     rbuf_j    = redund->rbuf_j;
2505     rbuf_a    = redund->rbuf_a;
2506   }
2507 
2508   if (reuse == MAT_INITIAL_MATRIX){
2509     PetscMPIInt  subrank,subsize;
2510     PetscInt     nleftover,np_subcomm;
2511     /* get the destination processors' id send_rank, nsends and nrecvs */
2512     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2513     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2514     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2515     np_subcomm = size/nsubcomm;
2516     nleftover  = size - nsubcomm*np_subcomm;
2517     nsends = 0; nrecvs = 0;
2518     for (i=0; i<size; i++){ /* i=rank*/
2519       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2520         send_rank[nsends] = i; nsends++;
2521         recv_rank[nrecvs++] = i;
2522       }
2523     }
2524     if (rank >= size - nleftover){/* this proc is a leftover processor */
2525       i = size-nleftover-1;
2526       j = 0;
2527       while (j < nsubcomm - nleftover){
2528         send_rank[nsends++] = i;
2529         i--; j++;
2530       }
2531     }
2532 
2533     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2534       for (i=0; i<nleftover; i++){
2535         recv_rank[nrecvs++] = size-nleftover+i;
2536       }
2537     }
2538 
2539     /* allocate sbuf_j, sbuf_a */
2540     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2541     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2542     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2543   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2544 
2545   /* copy mat's local entries into the buffers */
2546   if (reuse == MAT_INITIAL_MATRIX){
2547     rownz_max = 0;
2548     rptr = sbuf_j;
2549     cols = sbuf_j + rend-rstart + 1;
2550     vals = sbuf_a;
2551     rptr[0] = 0;
2552     for (i=0; i<rend-rstart; i++){
2553       row = i + rstart;
2554       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2555       ncols  = nzA + nzB;
2556       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2557       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2558       /* load the column indices for this row into cols */
2559       lwrite = 0;
2560       for (l=0; l<nzB; l++) {
2561         if ((ctmp = bmap[cworkB[l]]) < cstart){
2562           vals[lwrite]   = aworkB[l];
2563           cols[lwrite++] = ctmp;
2564         }
2565       }
2566       for (l=0; l<nzA; l++){
2567         vals[lwrite]   = aworkA[l];
2568         cols[lwrite++] = cstart + cworkA[l];
2569       }
2570       for (l=0; l<nzB; l++) {
2571         if ((ctmp = bmap[cworkB[l]]) >= cend){
2572           vals[lwrite]   = aworkB[l];
2573           cols[lwrite++] = ctmp;
2574         }
2575       }
2576       vals += ncols;
2577       cols += ncols;
2578       rptr[i+1] = rptr[i] + ncols;
2579       if (rownz_max < ncols) rownz_max = ncols;
2580     }
2581     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2582   } else { /* only copy matrix values into sbuf_a */
2583     rptr = sbuf_j;
2584     vals = sbuf_a;
2585     rptr[0] = 0;
2586     for (i=0; i<rend-rstart; i++){
2587       row = i + rstart;
2588       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2589       ncols  = nzA + nzB;
2590       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2591       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2592       lwrite = 0;
2593       for (l=0; l<nzB; l++) {
2594         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2595       }
2596       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2597       for (l=0; l<nzB; l++) {
2598         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2599       }
2600       vals += ncols;
2601       rptr[i+1] = rptr[i] + ncols;
2602     }
2603   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2604 
2605   /* send nzlocal to others, and recv other's nzlocal */
2606   /*--------------------------------------------------*/
2607   if (reuse == MAT_INITIAL_MATRIX){
2608     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2609     s_waits2 = s_waits3 + nsends;
2610     s_waits1 = s_waits2 + nsends;
2611     r_waits1 = s_waits1 + nsends;
2612     r_waits2 = r_waits1 + nrecvs;
2613     r_waits3 = r_waits2 + nrecvs;
2614   } else {
2615     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2616     r_waits3 = s_waits3 + nsends;
2617   }
2618 
2619   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2620   if (reuse == MAT_INITIAL_MATRIX){
2621     /* get new tags to keep the communication clean */
2622     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2623     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2624     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2625 
2626     /* post receives of other's nzlocal */
2627     for (i=0; i<nrecvs; i++){
2628       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2629     }
2630     /* send nzlocal to others */
2631     for (i=0; i<nsends; i++){
2632       sbuf_nz[i] = nzlocal;
2633       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2634     }
2635     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2636     count = nrecvs;
2637     while (count) {
2638       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2639       recv_rank[imdex] = recv_status.MPI_SOURCE;
2640       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2641       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2642 
2643       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2644       rbuf_nz[imdex] += i + 2;
2645       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2646       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2647       count--;
2648     }
2649     /* wait on sends of nzlocal */
2650     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2651     /* send mat->i,j to others, and recv from other's */
2652     /*------------------------------------------------*/
2653     for (i=0; i<nsends; i++){
2654       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2655       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2656     }
2657     /* wait on receives of mat->i,j */
2658     /*------------------------------*/
2659     count = nrecvs;
2660     while (count) {
2661       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2662       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2663       count--;
2664     }
2665     /* wait on sends of mat->i,j */
2666     /*---------------------------*/
2667     if (nsends) {
2668       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2669     }
2670   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2671 
2672   /* post receives, send and receive mat->a */
2673   /*----------------------------------------*/
2674   for (imdex=0; imdex<nrecvs; imdex++) {
2675     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2676   }
2677   for (i=0; i<nsends; i++){
2678     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2679   }
2680   count = nrecvs;
2681   while (count) {
2682     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2683     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2684     count--;
2685   }
2686   if (nsends) {
2687     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2688   }
2689 
2690   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2691 
2692   /* create redundant matrix */
2693   /*-------------------------*/
2694   if (reuse == MAT_INITIAL_MATRIX){
2695     /* compute rownz_max for preallocation */
2696     for (imdex=0; imdex<nrecvs; imdex++){
2697       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2698       rptr = rbuf_j[imdex];
2699       for (i=0; i<j; i++){
2700         ncols = rptr[i+1] - rptr[i];
2701         if (rownz_max < ncols) rownz_max = ncols;
2702       }
2703     }
2704 
2705     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2706     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2707     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2708     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2709     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2710   } else {
2711     C = *matredundant;
2712   }
2713 
2714   /* insert local matrix entries */
2715   rptr = sbuf_j;
2716   cols = sbuf_j + rend-rstart + 1;
2717   vals = sbuf_a;
2718   for (i=0; i<rend-rstart; i++){
2719     row   = i + rstart;
2720     ncols = rptr[i+1] - rptr[i];
2721     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2722     vals += ncols;
2723     cols += ncols;
2724   }
2725   /* insert received matrix entries */
2726   for (imdex=0; imdex<nrecvs; imdex++){
2727     rstart = rowrange[recv_rank[imdex]];
2728     rend   = rowrange[recv_rank[imdex]+1];
2729     rptr = rbuf_j[imdex];
2730     cols = rbuf_j[imdex] + rend-rstart + 1;
2731     vals = rbuf_a[imdex];
2732     for (i=0; i<rend-rstart; i++){
2733       row   = i + rstart;
2734       ncols = rptr[i+1] - rptr[i];
2735       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2736       vals += ncols;
2737       cols += ncols;
2738     }
2739   }
2740   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2741   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2742   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2743   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2744   if (reuse == MAT_INITIAL_MATRIX) {
2745     PetscContainer container;
2746     *matredundant = C;
2747     /* create a supporting struct and attach it to C for reuse */
2748     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2749     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2750     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2751     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2752     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2753     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2754 
2755     redund->nzlocal = nzlocal;
2756     redund->nsends  = nsends;
2757     redund->nrecvs  = nrecvs;
2758     redund->send_rank = send_rank;
2759     redund->recv_rank = recv_rank;
2760     redund->sbuf_nz = sbuf_nz;
2761     redund->rbuf_nz = rbuf_nz;
2762     redund->sbuf_j  = sbuf_j;
2763     redund->sbuf_a  = sbuf_a;
2764     redund->rbuf_j  = rbuf_j;
2765     redund->rbuf_a  = rbuf_a;
2766 
2767     redund->Destroy = C->ops->destroy;
2768     C->ops->destroy = MatDestroy_MatRedundant;
2769   }
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 #undef __FUNCT__
2774 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2775 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2776 {
2777   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2778   PetscErrorCode ierr;
2779   PetscInt       i,*idxb = 0;
2780   PetscScalar    *va,*vb;
2781   Vec            vtmp;
2782 
2783   PetscFunctionBegin;
2784   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2785   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2786   if (idx) {
2787     for (i=0; i<A->rmap->n; i++) {
2788       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2789     }
2790   }
2791 
2792   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2793   if (idx) {
2794     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2795   }
2796   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2797   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2798 
2799   for (i=0; i<A->rmap->n; i++){
2800     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2801       va[i] = vb[i];
2802       if (idx) idx[i] = a->garray[idxb[i]];
2803     }
2804   }
2805 
2806   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2807   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2808   ierr = PetscFree(idxb);CHKERRQ(ierr);
2809   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2810   PetscFunctionReturn(0);
2811 }
2812 
2813 #undef __FUNCT__
2814 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2815 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2816 {
2817   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2818   PetscErrorCode ierr;
2819   PetscInt       i,*idxb = 0;
2820   PetscScalar    *va,*vb;
2821   Vec            vtmp;
2822 
2823   PetscFunctionBegin;
2824   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2825   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2826   if (idx) {
2827     for (i=0; i<A->cmap->n; i++) {
2828       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2829     }
2830   }
2831 
2832   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2833   if (idx) {
2834     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2835   }
2836   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2837   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2838 
2839   for (i=0; i<A->rmap->n; i++){
2840     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2841       va[i] = vb[i];
2842       if (idx) idx[i] = a->garray[idxb[i]];
2843     }
2844   }
2845 
2846   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2847   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2848   ierr = PetscFree(idxb);CHKERRQ(ierr);
2849   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2850   PetscFunctionReturn(0);
2851 }
2852 
2853 #undef __FUNCT__
2854 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2855 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2856 {
2857   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2858   PetscInt       n      = A->rmap->n;
2859   PetscInt       cstart = A->cmap->rstart;
2860   PetscInt      *cmap   = mat->garray;
2861   PetscInt      *diagIdx, *offdiagIdx;
2862   Vec            diagV, offdiagV;
2863   PetscScalar   *a, *diagA, *offdiagA;
2864   PetscInt       r;
2865   PetscErrorCode ierr;
2866 
2867   PetscFunctionBegin;
2868   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2869   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2870   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2871   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2872   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2873   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2874   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2875   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2876   for(r = 0; r < n; ++r) {
2877     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2878       a[r]   = diagA[r];
2879       idx[r] = cstart + diagIdx[r];
2880     } else {
2881       a[r]   = offdiagA[r];
2882       idx[r] = cmap[offdiagIdx[r]];
2883     }
2884   }
2885   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2886   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2887   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2888   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2889   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2890   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2891   PetscFunctionReturn(0);
2892 }
2893 
2894 #undef __FUNCT__
2895 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2896 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2897 {
2898   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2899   PetscInt       n      = A->rmap->n;
2900   PetscInt       cstart = A->cmap->rstart;
2901   PetscInt      *cmap   = mat->garray;
2902   PetscInt      *diagIdx, *offdiagIdx;
2903   Vec            diagV, offdiagV;
2904   PetscScalar   *a, *diagA, *offdiagA;
2905   PetscInt       r;
2906   PetscErrorCode ierr;
2907 
2908   PetscFunctionBegin;
2909   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2910   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2911   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2912   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2913   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2914   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2915   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2916   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2917   for(r = 0; r < n; ++r) {
2918     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2919       a[r]   = diagA[r];
2920       idx[r] = cstart + diagIdx[r];
2921     } else {
2922       a[r]   = offdiagA[r];
2923       idx[r] = cmap[offdiagIdx[r]];
2924     }
2925   }
2926   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2927   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2928   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2929   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2930   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2931   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2932   PetscFunctionReturn(0);
2933 }
2934 
2935 #undef __FUNCT__
2936 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2937 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2938 {
2939   PetscErrorCode ierr;
2940   Mat            *dummy;
2941 
2942   PetscFunctionBegin;
2943   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2944   *newmat = *dummy;
2945   ierr = PetscFree(dummy);CHKERRQ(ierr);
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2950 
2951 #undef __FUNCT__
2952 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2953 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2954 {
2955   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2956   PetscErrorCode ierr;
2957 
2958   PetscFunctionBegin;
2959   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2960   PetscFunctionReturn(0);
2961 }
2962 
2963 
2964 /* -------------------------------------------------------------------*/
2965 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2966        MatGetRow_MPIAIJ,
2967        MatRestoreRow_MPIAIJ,
2968        MatMult_MPIAIJ,
2969 /* 4*/ MatMultAdd_MPIAIJ,
2970        MatMultTranspose_MPIAIJ,
2971        MatMultTransposeAdd_MPIAIJ,
2972 #ifdef PETSC_HAVE_PBGL
2973        MatSolve_MPIAIJ,
2974 #else
2975        0,
2976 #endif
2977        0,
2978        0,
2979 /*10*/ 0,
2980        0,
2981        0,
2982        MatSOR_MPIAIJ,
2983        MatTranspose_MPIAIJ,
2984 /*15*/ MatGetInfo_MPIAIJ,
2985        MatEqual_MPIAIJ,
2986        MatGetDiagonal_MPIAIJ,
2987        MatDiagonalScale_MPIAIJ,
2988        MatNorm_MPIAIJ,
2989 /*20*/ MatAssemblyBegin_MPIAIJ,
2990        MatAssemblyEnd_MPIAIJ,
2991        MatSetOption_MPIAIJ,
2992        MatZeroEntries_MPIAIJ,
2993 /*24*/ MatZeroRows_MPIAIJ,
2994        0,
2995 #ifdef PETSC_HAVE_PBGL
2996        0,
2997 #else
2998        0,
2999 #endif
3000        0,
3001        0,
3002 /*29*/ MatSetUpPreallocation_MPIAIJ,
3003 #ifdef PETSC_HAVE_PBGL
3004        0,
3005 #else
3006        0,
3007 #endif
3008        0,
3009        0,
3010        0,
3011 /*34*/ MatDuplicate_MPIAIJ,
3012        0,
3013        0,
3014        0,
3015        0,
3016 /*39*/ MatAXPY_MPIAIJ,
3017        MatGetSubMatrices_MPIAIJ,
3018        MatIncreaseOverlap_MPIAIJ,
3019        MatGetValues_MPIAIJ,
3020        MatCopy_MPIAIJ,
3021 /*44*/ MatGetRowMax_MPIAIJ,
3022        MatScale_MPIAIJ,
3023        0,
3024        0,
3025        MatZeroRowsColumns_MPIAIJ,
3026 /*49*/ MatSetBlockSize_MPIAIJ,
3027        0,
3028        0,
3029        0,
3030        0,
3031 /*54*/ MatFDColoringCreate_MPIAIJ,
3032        0,
3033        MatSetUnfactored_MPIAIJ,
3034        MatPermute_MPIAIJ,
3035        0,
3036 /*59*/ MatGetSubMatrix_MPIAIJ,
3037        MatDestroy_MPIAIJ,
3038        MatView_MPIAIJ,
3039        0,
3040        0,
3041 /*64*/ 0,
3042        0,
3043        0,
3044        0,
3045        0,
3046 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3047        MatGetRowMinAbs_MPIAIJ,
3048        0,
3049        MatSetColoring_MPIAIJ,
3050 #if defined(PETSC_HAVE_ADIC)
3051        MatSetValuesAdic_MPIAIJ,
3052 #else
3053        0,
3054 #endif
3055        MatSetValuesAdifor_MPIAIJ,
3056 /*75*/ MatFDColoringApply_AIJ,
3057        0,
3058        0,
3059        0,
3060        0,
3061 /*80*/ 0,
3062        0,
3063        0,
3064 /*83*/ MatLoad_MPIAIJ,
3065        0,
3066        0,
3067        0,
3068        0,
3069        0,
3070 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3071        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3072        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3073        MatPtAP_Basic,
3074        MatPtAPSymbolic_MPIAIJ,
3075 /*94*/ MatPtAPNumeric_MPIAIJ,
3076        0,
3077        0,
3078        0,
3079        0,
3080 /*99*/ 0,
3081        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3082        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3083        MatConjugate_MPIAIJ,
3084        0,
3085 /*104*/MatSetValuesRow_MPIAIJ,
3086        MatRealPart_MPIAIJ,
3087        MatImaginaryPart_MPIAIJ,
3088        0,
3089        0,
3090 /*109*/0,
3091        MatGetRedundantMatrix_MPIAIJ,
3092        MatGetRowMin_MPIAIJ,
3093        0,
3094        0,
3095 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3096        0,
3097        0,
3098        0,
3099        0,
3100 /*119*/0,
3101        0,
3102        0,
3103        0,
3104        MatGetMultiProcBlock_MPIAIJ,
3105 /*124*/MatFindNonZeroRows_MPIAIJ,
3106        MatGetColumnNorms_MPIAIJ,
3107        MatInvertBlockDiagonal_MPIAIJ,
3108        0,
3109        MatGetSubMatricesParallel_MPIAIJ,
3110 /*129*/0,
3111        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3112        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3113        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3114        0,
3115 /*134*/0,
3116        0,
3117        0,
3118        0,
3119        0
3120 };
3121 
3122 /* ----------------------------------------------------------------------------------------*/
3123 
3124 EXTERN_C_BEGIN
3125 #undef __FUNCT__
3126 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3127 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3128 {
3129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3130   PetscErrorCode ierr;
3131 
3132   PetscFunctionBegin;
3133   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3134   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3135   PetscFunctionReturn(0);
3136 }
3137 EXTERN_C_END
3138 
3139 EXTERN_C_BEGIN
3140 #undef __FUNCT__
3141 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3142 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3143 {
3144   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3145   PetscErrorCode ierr;
3146 
3147   PetscFunctionBegin;
3148   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3149   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3150   PetscFunctionReturn(0);
3151 }
3152 EXTERN_C_END
3153 
3154 EXTERN_C_BEGIN
3155 #undef __FUNCT__
3156 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3157 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3158 {
3159   Mat_MPIAIJ     *b;
3160   PetscErrorCode ierr;
3161   PetscInt       i;
3162   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3163 
3164   PetscFunctionBegin;
3165   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3166   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3167   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3168   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3169   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3170   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3171 
3172   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3173   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3174   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3175   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3176   if (d_nnz) {
3177     for (i=0; i<B->rmap->n; i++) {
3178       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3179     }
3180   }
3181   if (o_nnz) {
3182     for (i=0; i<B->rmap->n; i++) {
3183       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3184     }
3185   }
3186   b = (Mat_MPIAIJ*)B->data;
3187 
3188   if (!B->preallocated) {
3189     /* Explicitly create 2 MATSEQAIJ matrices. */
3190     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3191     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3192     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3193     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3194     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3195     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3196     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3197     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3198   }
3199 
3200   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3201   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3202   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3203   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3204   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3205   B->preallocated = PETSC_TRUE;
3206   PetscFunctionReturn(0);
3207 }
3208 EXTERN_C_END
3209 
3210 #undef __FUNCT__
3211 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3212 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3213 {
3214   Mat            mat;
3215   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3216   PetscErrorCode ierr;
3217 
3218   PetscFunctionBegin;
3219   *newmat       = 0;
3220   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3221   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3222   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3223   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3224   a    = (Mat_MPIAIJ*)mat->data;
3225 
3226   mat->factortype    = matin->factortype;
3227   mat->rmap->bs      = matin->rmap->bs;
3228   mat->assembled    = PETSC_TRUE;
3229   mat->insertmode   = NOT_SET_VALUES;
3230   mat->preallocated = PETSC_TRUE;
3231 
3232   a->size           = oldmat->size;
3233   a->rank           = oldmat->rank;
3234   a->donotstash     = oldmat->donotstash;
3235   a->roworiented    = oldmat->roworiented;
3236   a->rowindices     = 0;
3237   a->rowvalues      = 0;
3238   a->getrowactive   = PETSC_FALSE;
3239 
3240   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3241   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3242 
3243   if (oldmat->colmap) {
3244 #if defined (PETSC_USE_CTABLE)
3245     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3246 #else
3247     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3248     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3249     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3250 #endif
3251   } else a->colmap = 0;
3252   if (oldmat->garray) {
3253     PetscInt len;
3254     len  = oldmat->B->cmap->n;
3255     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3256     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3257     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3258   } else a->garray = 0;
3259 
3260   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3261   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3262   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3263   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3264   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3265   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3266   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3267   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3268   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3269   *newmat = mat;
3270   PetscFunctionReturn(0);
3271 }
3272 
3273 
3274 
3275 #undef __FUNCT__
3276 #define __FUNCT__ "MatLoad_MPIAIJ"
3277 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3278 {
3279   PetscScalar    *vals,*svals;
3280   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3281   PetscErrorCode ierr;
3282   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3283   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3284   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3285   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3286   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3287   int            fd;
3288 
3289   PetscFunctionBegin;
3290   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3291   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3292   if (!rank) {
3293     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3294     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3295     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3296   }
3297 
3298   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3299 
3300   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3301   M = header[1]; N = header[2];
3302   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3303   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3304   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3305 
3306   /* If global sizes are set, check if they are consistent with that given in the file */
3307   if (sizesset) {
3308     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3309   }
3310   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3311   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3312 
3313   /* determine ownership of all rows */
3314   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3315   else m = newMat->rmap->n; /* Set by user */
3316 
3317   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3318   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3319 
3320   /* First process needs enough room for process with most rows */
3321   if (!rank) {
3322     mmax       = rowners[1];
3323     for (i=2; i<size; i++) {
3324       mmax = PetscMax(mmax,rowners[i]);
3325     }
3326   } else mmax = m;
3327 
3328   rowners[0] = 0;
3329   for (i=2; i<=size; i++) {
3330     rowners[i] += rowners[i-1];
3331   }
3332   rstart = rowners[rank];
3333   rend   = rowners[rank+1];
3334 
3335   /* distribute row lengths to all processors */
3336   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3337   if (!rank) {
3338     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3339     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3340     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3341     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3342     for (j=0; j<m; j++) {
3343       procsnz[0] += ourlens[j];
3344     }
3345     for (i=1; i<size; i++) {
3346       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3347       /* calculate the number of nonzeros on each processor */
3348       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3349         procsnz[i] += rowlengths[j];
3350       }
3351       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3352     }
3353     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3354   } else {
3355     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3356   }
3357 
3358   if (!rank) {
3359     /* determine max buffer needed and allocate it */
3360     maxnz = 0;
3361     for (i=0; i<size; i++) {
3362       maxnz = PetscMax(maxnz,procsnz[i]);
3363     }
3364     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3365 
3366     /* read in my part of the matrix column indices  */
3367     nz   = procsnz[0];
3368     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3369     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3370 
3371     /* read in every one elses and ship off */
3372     for (i=1; i<size; i++) {
3373       nz     = procsnz[i];
3374       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3375       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3376     }
3377     ierr = PetscFree(cols);CHKERRQ(ierr);
3378   } else {
3379     /* determine buffer space needed for message */
3380     nz = 0;
3381     for (i=0; i<m; i++) {
3382       nz += ourlens[i];
3383     }
3384     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3385 
3386     /* receive message of column indices*/
3387     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3388   }
3389 
3390   /* determine column ownership if matrix is not square */
3391   if (N != M) {
3392     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3393     else n = newMat->cmap->n;
3394     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3395     cstart = cend - n;
3396   } else {
3397     cstart = rstart;
3398     cend   = rend;
3399     n      = cend - cstart;
3400   }
3401 
3402   /* loop over local rows, determining number of off diagonal entries */
3403   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3404   jj = 0;
3405   for (i=0; i<m; i++) {
3406     for (j=0; j<ourlens[i]; j++) {
3407       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3408       jj++;
3409     }
3410   }
3411 
3412   for (i=0; i<m; i++) {
3413     ourlens[i] -= offlens[i];
3414   }
3415   if (!sizesset) {
3416     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3417   }
3418   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3419 
3420   for (i=0; i<m; i++) {
3421     ourlens[i] += offlens[i];
3422   }
3423 
3424   if (!rank) {
3425     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3426 
3427     /* read in my part of the matrix numerical values  */
3428     nz   = procsnz[0];
3429     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3430 
3431     /* insert into matrix */
3432     jj      = rstart;
3433     smycols = mycols;
3434     svals   = vals;
3435     for (i=0; i<m; i++) {
3436       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3437       smycols += ourlens[i];
3438       svals   += ourlens[i];
3439       jj++;
3440     }
3441 
3442     /* read in other processors and ship out */
3443     for (i=1; i<size; i++) {
3444       nz     = procsnz[i];
3445       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3446       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3447     }
3448     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3449   } else {
3450     /* receive numeric values */
3451     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3452 
3453     /* receive message of values*/
3454     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3455 
3456     /* insert into matrix */
3457     jj      = rstart;
3458     smycols = mycols;
3459     svals   = vals;
3460     for (i=0; i<m; i++) {
3461       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3462       smycols += ourlens[i];
3463       svals   += ourlens[i];
3464       jj++;
3465     }
3466   }
3467   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3468   ierr = PetscFree(vals);CHKERRQ(ierr);
3469   ierr = PetscFree(mycols);CHKERRQ(ierr);
3470   ierr = PetscFree(rowners);CHKERRQ(ierr);
3471 
3472   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3473   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3474   PetscFunctionReturn(0);
3475 }
3476 
3477 #undef __FUNCT__
3478 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3479 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3480 {
3481   PetscErrorCode ierr;
3482   IS             iscol_local;
3483   PetscInt       csize;
3484 
3485   PetscFunctionBegin;
3486   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3487   if (call == MAT_REUSE_MATRIX) {
3488     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3489     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3490   } else {
3491     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3492   }
3493   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3494   if (call == MAT_INITIAL_MATRIX) {
3495     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3496     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3497   }
3498   PetscFunctionReturn(0);
3499 }
3500 
3501 #undef __FUNCT__
3502 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3503 /*
3504     Not great since it makes two copies of the submatrix, first an SeqAIJ
3505   in local and then by concatenating the local matrices the end result.
3506   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3507 
3508   Note: This requires a sequential iscol with all indices.
3509 */
3510 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3511 {
3512   PetscErrorCode ierr;
3513   PetscMPIInt    rank,size;
3514   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3515   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3516   Mat            *local,M,Mreuse;
3517   MatScalar      *vwork,*aa;
3518   MPI_Comm       comm = ((PetscObject)mat)->comm;
3519   Mat_SeqAIJ     *aij;
3520 
3521 
3522   PetscFunctionBegin;
3523   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3524   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3525 
3526   if (call ==  MAT_REUSE_MATRIX) {
3527     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3528     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3529     local = &Mreuse;
3530     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3531   } else {
3532     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3533     Mreuse = *local;
3534     ierr   = PetscFree(local);CHKERRQ(ierr);
3535   }
3536 
3537   /*
3538       m - number of local rows
3539       n - number of columns (same on all processors)
3540       rstart - first row in new global matrix generated
3541   */
3542   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3543   if (call == MAT_INITIAL_MATRIX) {
3544     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3545     ii  = aij->i;
3546     jj  = aij->j;
3547 
3548     /*
3549         Determine the number of non-zeros in the diagonal and off-diagonal
3550         portions of the matrix in order to do correct preallocation
3551     */
3552 
3553     /* first get start and end of "diagonal" columns */
3554     if (csize == PETSC_DECIDE) {
3555       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3556       if (mglobal == n) { /* square matrix */
3557 	nlocal = m;
3558       } else {
3559         nlocal = n/size + ((n % size) > rank);
3560       }
3561     } else {
3562       nlocal = csize;
3563     }
3564     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3565     rstart = rend - nlocal;
3566     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3567 
3568     /* next, compute all the lengths */
3569     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3570     olens = dlens + m;
3571     for (i=0; i<m; i++) {
3572       jend = ii[i+1] - ii[i];
3573       olen = 0;
3574       dlen = 0;
3575       for (j=0; j<jend; j++) {
3576         if (*jj < rstart || *jj >= rend) olen++;
3577         else dlen++;
3578         jj++;
3579       }
3580       olens[i] = olen;
3581       dlens[i] = dlen;
3582     }
3583     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3584     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3585     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3586     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3587     ierr = PetscFree(dlens);CHKERRQ(ierr);
3588   } else {
3589     PetscInt ml,nl;
3590 
3591     M = *newmat;
3592     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3593     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3594     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3595     /*
3596          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3597        rather than the slower MatSetValues().
3598     */
3599     M->was_assembled = PETSC_TRUE;
3600     M->assembled     = PETSC_FALSE;
3601   }
3602   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3603   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3604   ii  = aij->i;
3605   jj  = aij->j;
3606   aa  = aij->a;
3607   for (i=0; i<m; i++) {
3608     row   = rstart + i;
3609     nz    = ii[i+1] - ii[i];
3610     cwork = jj;     jj += nz;
3611     vwork = aa;     aa += nz;
3612     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3613   }
3614 
3615   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3616   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3617   *newmat = M;
3618 
3619   /* save submatrix used in processor for next request */
3620   if (call ==  MAT_INITIAL_MATRIX) {
3621     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3622     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3623   }
3624 
3625   PetscFunctionReturn(0);
3626 }
3627 
3628 EXTERN_C_BEGIN
3629 #undef __FUNCT__
3630 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3631 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3632 {
3633   PetscInt       m,cstart, cend,j,nnz,i,d;
3634   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3635   const PetscInt *JJ;
3636   PetscScalar    *values;
3637   PetscErrorCode ierr;
3638 
3639   PetscFunctionBegin;
3640   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3641 
3642   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3643   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3644   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3645   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3646   m      = B->rmap->n;
3647   cstart = B->cmap->rstart;
3648   cend   = B->cmap->rend;
3649   rstart = B->rmap->rstart;
3650 
3651   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3652 
3653 #if defined(PETSC_USE_DEBUGGING)
3654   for (i=0; i<m; i++) {
3655     nnz     = Ii[i+1]- Ii[i];
3656     JJ      = J + Ii[i];
3657     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3658     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3659     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3660   }
3661 #endif
3662 
3663   for (i=0; i<m; i++) {
3664     nnz     = Ii[i+1]- Ii[i];
3665     JJ      = J + Ii[i];
3666     nnz_max = PetscMax(nnz_max,nnz);
3667     d       = 0;
3668     for (j=0; j<nnz; j++) {
3669       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3670     }
3671     d_nnz[i] = d;
3672     o_nnz[i] = nnz - d;
3673   }
3674   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3675   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3676 
3677   if (v) values = (PetscScalar*)v;
3678   else {
3679     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3680     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3681   }
3682 
3683   for (i=0; i<m; i++) {
3684     ii   = i + rstart;
3685     nnz  = Ii[i+1]- Ii[i];
3686     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3687   }
3688   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3689   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3690 
3691   if (!v) {
3692     ierr = PetscFree(values);CHKERRQ(ierr);
3693   }
3694   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3695   PetscFunctionReturn(0);
3696 }
3697 EXTERN_C_END
3698 
3699 #undef __FUNCT__
3700 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3701 /*@
3702    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3703    (the default parallel PETSc format).
3704 
3705    Collective on MPI_Comm
3706 
3707    Input Parameters:
3708 +  B - the matrix
3709 .  i - the indices into j for the start of each local row (starts with zero)
3710 .  j - the column indices for each local row (starts with zero)
3711 -  v - optional values in the matrix
3712 
3713    Level: developer
3714 
3715    Notes:
3716        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3717      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3718      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3719 
3720        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3721 
3722        The format which is used for the sparse matrix input, is equivalent to a
3723     row-major ordering.. i.e for the following matrix, the input data expected is
3724     as shown:
3725 
3726         1 0 0
3727         2 0 3     P0
3728        -------
3729         4 5 6     P1
3730 
3731      Process0 [P0]: rows_owned=[0,1]
3732         i =  {0,1,3}  [size = nrow+1  = 2+1]
3733         j =  {0,0,2}  [size = nz = 6]
3734         v =  {1,2,3}  [size = nz = 6]
3735 
3736      Process1 [P1]: rows_owned=[2]
3737         i =  {0,3}    [size = nrow+1  = 1+1]
3738         j =  {0,1,2}  [size = nz = 6]
3739         v =  {4,5,6}  [size = nz = 6]
3740 
3741 .keywords: matrix, aij, compressed row, sparse, parallel
3742 
3743 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3744           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3745 @*/
3746 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3747 {
3748   PetscErrorCode ierr;
3749 
3750   PetscFunctionBegin;
3751   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3752   PetscFunctionReturn(0);
3753 }
3754 
3755 #undef __FUNCT__
3756 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3757 /*@C
3758    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3759    (the default parallel PETSc format).  For good matrix assembly performance
3760    the user should preallocate the matrix storage by setting the parameters
3761    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3762    performance can be increased by more than a factor of 50.
3763 
3764    Collective on MPI_Comm
3765 
3766    Input Parameters:
3767 +  A - the matrix
3768 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3769            (same value is used for all local rows)
3770 .  d_nnz - array containing the number of nonzeros in the various rows of the
3771            DIAGONAL portion of the local submatrix (possibly different for each row)
3772            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3773            The size of this array is equal to the number of local rows, i.e 'm'.
3774            For matrices that will be factored, you must leave room for (and set)
3775            the diagonal entry even if it is zero.
3776 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3777            submatrix (same value is used for all local rows).
3778 -  o_nnz - array containing the number of nonzeros in the various rows of the
3779            OFF-DIAGONAL portion of the local submatrix (possibly different for
3780            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3781            structure. The size of this array is equal to the number
3782            of local rows, i.e 'm'.
3783 
3784    If the *_nnz parameter is given then the *_nz parameter is ignored
3785 
3786    The AIJ format (also called the Yale sparse matrix format or
3787    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3788    storage.  The stored row and column indices begin with zero.
3789    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3790 
3791    The parallel matrix is partitioned such that the first m0 rows belong to
3792    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3793    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3794 
3795    The DIAGONAL portion of the local submatrix of a processor can be defined
3796    as the submatrix which is obtained by extraction the part corresponding to
3797    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3798    first row that belongs to the processor, r2 is the last row belonging to
3799    the this processor, and c1-c2 is range of indices of the local part of a
3800    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3801    common case of a square matrix, the row and column ranges are the same and
3802    the DIAGONAL part is also square. The remaining portion of the local
3803    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3804 
3805    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3806 
3807    You can call MatGetInfo() to get information on how effective the preallocation was;
3808    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3809    You can also run with the option -info and look for messages with the string
3810    malloc in them to see if additional memory allocation was needed.
3811 
3812    Example usage:
3813 
3814    Consider the following 8x8 matrix with 34 non-zero values, that is
3815    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3816    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3817    as follows:
3818 
3819 .vb
3820             1  2  0  |  0  3  0  |  0  4
3821     Proc0   0  5  6  |  7  0  0  |  8  0
3822             9  0 10  | 11  0  0  | 12  0
3823     -------------------------------------
3824            13  0 14  | 15 16 17  |  0  0
3825     Proc1   0 18  0  | 19 20 21  |  0  0
3826             0  0  0  | 22 23  0  | 24  0
3827     -------------------------------------
3828     Proc2  25 26 27  |  0  0 28  | 29  0
3829            30  0  0  | 31 32 33  |  0 34
3830 .ve
3831 
3832    This can be represented as a collection of submatrices as:
3833 
3834 .vb
3835       A B C
3836       D E F
3837       G H I
3838 .ve
3839 
3840    Where the submatrices A,B,C are owned by proc0, D,E,F are
3841    owned by proc1, G,H,I are owned by proc2.
3842 
3843    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3844    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3845    The 'M','N' parameters are 8,8, and have the same values on all procs.
3846 
3847    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3848    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3849    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3850    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3851    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3852    matrix, ans [DF] as another SeqAIJ matrix.
3853 
3854    When d_nz, o_nz parameters are specified, d_nz storage elements are
3855    allocated for every row of the local diagonal submatrix, and o_nz
3856    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3857    One way to choose d_nz and o_nz is to use the max nonzerors per local
3858    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3859    In this case, the values of d_nz,o_nz are:
3860 .vb
3861      proc0 : dnz = 2, o_nz = 2
3862      proc1 : dnz = 3, o_nz = 2
3863      proc2 : dnz = 1, o_nz = 4
3864 .ve
3865    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3866    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3867    for proc3. i.e we are using 12+15+10=37 storage locations to store
3868    34 values.
3869 
3870    When d_nnz, o_nnz parameters are specified, the storage is specified
3871    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3872    In the above case the values for d_nnz,o_nnz are:
3873 .vb
3874      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3875      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3876      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3877 .ve
3878    Here the space allocated is sum of all the above values i.e 34, and
3879    hence pre-allocation is perfect.
3880 
3881    Level: intermediate
3882 
3883 .keywords: matrix, aij, compressed row, sparse, parallel
3884 
3885 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3886           MPIAIJ, MatGetInfo()
3887 @*/
3888 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3889 {
3890   PetscErrorCode ierr;
3891 
3892   PetscFunctionBegin;
3893   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3894   PetscValidType(B,1);
3895   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3896   PetscFunctionReturn(0);
3897 }
3898 
3899 #undef __FUNCT__
3900 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3901 /*@
3902      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3903          CSR format the local rows.
3904 
3905    Collective on MPI_Comm
3906 
3907    Input Parameters:
3908 +  comm - MPI communicator
3909 .  m - number of local rows (Cannot be PETSC_DECIDE)
3910 .  n - This value should be the same as the local size used in creating the
3911        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3912        calculated if N is given) For square matrices n is almost always m.
3913 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3914 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3915 .   i - row indices
3916 .   j - column indices
3917 -   a - matrix values
3918 
3919    Output Parameter:
3920 .   mat - the matrix
3921 
3922    Level: intermediate
3923 
3924    Notes:
3925        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3926      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3927      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3928 
3929        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3930 
3931        The format which is used for the sparse matrix input, is equivalent to a
3932     row-major ordering.. i.e for the following matrix, the input data expected is
3933     as shown:
3934 
3935         1 0 0
3936         2 0 3     P0
3937        -------
3938         4 5 6     P1
3939 
3940      Process0 [P0]: rows_owned=[0,1]
3941         i =  {0,1,3}  [size = nrow+1  = 2+1]
3942         j =  {0,0,2}  [size = nz = 6]
3943         v =  {1,2,3}  [size = nz = 6]
3944 
3945      Process1 [P1]: rows_owned=[2]
3946         i =  {0,3}    [size = nrow+1  = 1+1]
3947         j =  {0,1,2}  [size = nz = 6]
3948         v =  {4,5,6}  [size = nz = 6]
3949 
3950 .keywords: matrix, aij, compressed row, sparse, parallel
3951 
3952 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3953           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3954 @*/
3955 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3956 {
3957   PetscErrorCode ierr;
3958 
3959  PetscFunctionBegin;
3960   if (i[0]) {
3961     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3962   }
3963   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3964   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3965   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3966   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3967   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3968   PetscFunctionReturn(0);
3969 }
3970 
3971 #undef __FUNCT__
3972 #define __FUNCT__ "MatCreateMPIAIJ"
3973 /*@C
3974    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3975    (the default parallel PETSc format).  For good matrix assembly performance
3976    the user should preallocate the matrix storage by setting the parameters
3977    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3978    performance can be increased by more than a factor of 50.
3979 
3980    Collective on MPI_Comm
3981 
3982    Input Parameters:
3983 +  comm - MPI communicator
3984 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3985            This value should be the same as the local size used in creating the
3986            y vector for the matrix-vector product y = Ax.
3987 .  n - This value should be the same as the local size used in creating the
3988        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3989        calculated if N is given) For square matrices n is almost always m.
3990 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3991 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3992 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3993            (same value is used for all local rows)
3994 .  d_nnz - array containing the number of nonzeros in the various rows of the
3995            DIAGONAL portion of the local submatrix (possibly different for each row)
3996            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3997            The size of this array is equal to the number of local rows, i.e 'm'.
3998 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3999            submatrix (same value is used for all local rows).
4000 -  o_nnz - array containing the number of nonzeros in the various rows of the
4001            OFF-DIAGONAL portion of the local submatrix (possibly different for
4002            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4003            structure. The size of this array is equal to the number
4004            of local rows, i.e 'm'.
4005 
4006    Output Parameter:
4007 .  A - the matrix
4008 
4009    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4010    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4011    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4012 
4013    Notes:
4014    If the *_nnz parameter is given then the *_nz parameter is ignored
4015 
4016    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4017    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4018    storage requirements for this matrix.
4019 
4020    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4021    processor than it must be used on all processors that share the object for
4022    that argument.
4023 
4024    The user MUST specify either the local or global matrix dimensions
4025    (possibly both).
4026 
4027    The parallel matrix is partitioned across processors such that the
4028    first m0 rows belong to process 0, the next m1 rows belong to
4029    process 1, the next m2 rows belong to process 2 etc.. where
4030    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4031    values corresponding to [m x N] submatrix.
4032 
4033    The columns are logically partitioned with the n0 columns belonging
4034    to 0th partition, the next n1 columns belonging to the next
4035    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4036 
4037    The DIAGONAL portion of the local submatrix on any given processor
4038    is the submatrix corresponding to the rows and columns m,n
4039    corresponding to the given processor. i.e diagonal matrix on
4040    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4041    etc. The remaining portion of the local submatrix [m x (N-n)]
4042    constitute the OFF-DIAGONAL portion. The example below better
4043    illustrates this concept.
4044 
4045    For a square global matrix we define each processor's diagonal portion
4046    to be its local rows and the corresponding columns (a square submatrix);
4047    each processor's off-diagonal portion encompasses the remainder of the
4048    local matrix (a rectangular submatrix).
4049 
4050    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4051 
4052    When calling this routine with a single process communicator, a matrix of
4053    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4054    type of communicator, use the construction mechanism:
4055      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4056 
4057    By default, this format uses inodes (identical nodes) when possible.
4058    We search for consecutive rows with the same nonzero structure, thereby
4059    reusing matrix information to achieve increased efficiency.
4060 
4061    Options Database Keys:
4062 +  -mat_no_inode  - Do not use inodes
4063 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4064 -  -mat_aij_oneindex - Internally use indexing starting at 1
4065         rather than 0.  Note that when calling MatSetValues(),
4066         the user still MUST index entries starting at 0!
4067 
4068 
4069    Example usage:
4070 
4071    Consider the following 8x8 matrix with 34 non-zero values, that is
4072    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4073    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4074    as follows:
4075 
4076 .vb
4077             1  2  0  |  0  3  0  |  0  4
4078     Proc0   0  5  6  |  7  0  0  |  8  0
4079             9  0 10  | 11  0  0  | 12  0
4080     -------------------------------------
4081            13  0 14  | 15 16 17  |  0  0
4082     Proc1   0 18  0  | 19 20 21  |  0  0
4083             0  0  0  | 22 23  0  | 24  0
4084     -------------------------------------
4085     Proc2  25 26 27  |  0  0 28  | 29  0
4086            30  0  0  | 31 32 33  |  0 34
4087 .ve
4088 
4089    This can be represented as a collection of submatrices as:
4090 
4091 .vb
4092       A B C
4093       D E F
4094       G H I
4095 .ve
4096 
4097    Where the submatrices A,B,C are owned by proc0, D,E,F are
4098    owned by proc1, G,H,I are owned by proc2.
4099 
4100    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4101    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4102    The 'M','N' parameters are 8,8, and have the same values on all procs.
4103 
4104    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4105    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4106    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4107    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4108    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4109    matrix, ans [DF] as another SeqAIJ matrix.
4110 
4111    When d_nz, o_nz parameters are specified, d_nz storage elements are
4112    allocated for every row of the local diagonal submatrix, and o_nz
4113    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4114    One way to choose d_nz and o_nz is to use the max nonzerors per local
4115    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4116    In this case, the values of d_nz,o_nz are:
4117 .vb
4118      proc0 : dnz = 2, o_nz = 2
4119      proc1 : dnz = 3, o_nz = 2
4120      proc2 : dnz = 1, o_nz = 4
4121 .ve
4122    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4123    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4124    for proc3. i.e we are using 12+15+10=37 storage locations to store
4125    34 values.
4126 
4127    When d_nnz, o_nnz parameters are specified, the storage is specified
4128    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4129    In the above case the values for d_nnz,o_nnz are:
4130 .vb
4131      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4132      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4133      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4134 .ve
4135    Here the space allocated is sum of all the above values i.e 34, and
4136    hence pre-allocation is perfect.
4137 
4138    Level: intermediate
4139 
4140 .keywords: matrix, aij, compressed row, sparse, parallel
4141 
4142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4143           MPIAIJ, MatCreateMPIAIJWithArrays()
4144 @*/
4145 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4146 {
4147   PetscErrorCode ierr;
4148   PetscMPIInt    size;
4149 
4150   PetscFunctionBegin;
4151   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4152   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4153   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4154   if (size > 1) {
4155     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4156     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4157   } else {
4158     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4159     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4160   }
4161   PetscFunctionReturn(0);
4162 }
4163 
4164 #undef __FUNCT__
4165 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4166 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4167 {
4168   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4169 
4170   PetscFunctionBegin;
4171   *Ad     = a->A;
4172   *Ao     = a->B;
4173   *colmap = a->garray;
4174   PetscFunctionReturn(0);
4175 }
4176 
4177 #undef __FUNCT__
4178 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4179 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4180 {
4181   PetscErrorCode ierr;
4182   PetscInt       i;
4183   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4184 
4185   PetscFunctionBegin;
4186   if (coloring->ctype == IS_COLORING_GLOBAL) {
4187     ISColoringValue *allcolors,*colors;
4188     ISColoring      ocoloring;
4189 
4190     /* set coloring for diagonal portion */
4191     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4192 
4193     /* set coloring for off-diagonal portion */
4194     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4195     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4196     for (i=0; i<a->B->cmap->n; i++) {
4197       colors[i] = allcolors[a->garray[i]];
4198     }
4199     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4200     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4201     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4202     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4203   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4204     ISColoringValue *colors;
4205     PetscInt        *larray;
4206     ISColoring      ocoloring;
4207 
4208     /* set coloring for diagonal portion */
4209     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4210     for (i=0; i<a->A->cmap->n; i++) {
4211       larray[i] = i + A->cmap->rstart;
4212     }
4213     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4214     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4215     for (i=0; i<a->A->cmap->n; i++) {
4216       colors[i] = coloring->colors[larray[i]];
4217     }
4218     ierr = PetscFree(larray);CHKERRQ(ierr);
4219     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4220     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4221     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4222 
4223     /* set coloring for off-diagonal portion */
4224     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4225     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4226     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4227     for (i=0; i<a->B->cmap->n; i++) {
4228       colors[i] = coloring->colors[larray[i]];
4229     }
4230     ierr = PetscFree(larray);CHKERRQ(ierr);
4231     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4232     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4233     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4234   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4235 
4236   PetscFunctionReturn(0);
4237 }
4238 
4239 #if defined(PETSC_HAVE_ADIC)
4240 #undef __FUNCT__
4241 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4242 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4243 {
4244   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4245   PetscErrorCode ierr;
4246 
4247   PetscFunctionBegin;
4248   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4249   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4250   PetscFunctionReturn(0);
4251 }
4252 #endif
4253 
4254 #undef __FUNCT__
4255 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4256 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4257 {
4258   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4259   PetscErrorCode ierr;
4260 
4261   PetscFunctionBegin;
4262   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4263   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4264   PetscFunctionReturn(0);
4265 }
4266 
4267 #undef __FUNCT__
4268 #define __FUNCT__ "MatMergeSymbolic"
4269 PetscErrorCode  MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4270 {
4271   PetscErrorCode ierr;
4272   PetscInt       m,N,i,rstart,nnz,*dnz,*onz;
4273   PetscInt       *indx;
4274 
4275   PetscFunctionBegin;
4276   /* This routine will ONLY return MPIAIJ type matrix */
4277   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4278   if (n == PETSC_DECIDE){
4279     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4280   }
4281   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4282   rstart -= m;
4283 
4284   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4285   for (i=0;i<m;i++) {
4286     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4287     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4288     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4289   }
4290 
4291   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4292   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4293   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4294   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4295   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4296   PetscFunctionReturn(0);
4297 }
4298 
4299 #undef __FUNCT__
4300 #define __FUNCT__ "MatMergeNumeric"
4301 PetscErrorCode  MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4302 {
4303   PetscErrorCode ierr;
4304   PetscInt       m,N,i,rstart,nnz,Ii;
4305   PetscInt       *indx;
4306   PetscScalar    *values;
4307 
4308   PetscFunctionBegin;
4309   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4310   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4311   for (i=0;i<m;i++) {
4312     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4313     Ii    = i + rstart;
4314     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4315     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4316   }
4317   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4318   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4319   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4320   PetscFunctionReturn(0);
4321 }
4322 
4323 #undef __FUNCT__
4324 #define __FUNCT__ "MatMerge"
4325 /*@
4326       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4327                  matrices from each processor
4328 
4329     Collective on MPI_Comm
4330 
4331    Input Parameters:
4332 +    comm - the communicators the parallel matrix will live on
4333 .    inmat - the input sequential matrices
4334 .    n - number of local columns (or PETSC_DECIDE)
4335 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4336 
4337    Output Parameter:
4338 .    outmat - the parallel matrix generated
4339 
4340     Level: advanced
4341 
4342    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4343 
4344 @*/
4345 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4346 {
4347   PetscErrorCode ierr;
4348 
4349   PetscFunctionBegin;
4350   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4351   if (scall == MAT_INITIAL_MATRIX){
4352     ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4353   }
4354   ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4355   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4356   PetscFunctionReturn(0);
4357 }
4358 
4359 #undef __FUNCT__
4360 #define __FUNCT__ "MatFileSplit"
4361 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4362 {
4363   PetscErrorCode    ierr;
4364   PetscMPIInt       rank;
4365   PetscInt          m,N,i,rstart,nnz;
4366   size_t            len;
4367   const PetscInt    *indx;
4368   PetscViewer       out;
4369   char              *name;
4370   Mat               B;
4371   const PetscScalar *values;
4372 
4373   PetscFunctionBegin;
4374   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4375   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4376   /* Should this be the type of the diagonal block of A? */
4377   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4378   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4379   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4380   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4381   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4382   for (i=0;i<m;i++) {
4383     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4384     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4385     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4386   }
4387   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4388   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4389 
4390   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4391   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4392   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4393   sprintf(name,"%s.%d",outfile,rank);
4394   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4395   ierr = PetscFree(name);
4396   ierr = MatView(B,out);CHKERRQ(ierr);
4397   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4398   ierr = MatDestroy(&B);CHKERRQ(ierr);
4399   PetscFunctionReturn(0);
4400 }
4401 
4402 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4403 #undef __FUNCT__
4404 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4405 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4406 {
4407   PetscErrorCode       ierr;
4408   Mat_Merge_SeqsToMPI  *merge;
4409   PetscContainer       container;
4410 
4411   PetscFunctionBegin;
4412   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4413   if (container) {
4414     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4415     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4416     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4417     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4418     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4419     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4420     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4421     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4422     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4423     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4424     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4425     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4426     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4427     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4428     ierr = PetscFree(merge);CHKERRQ(ierr);
4429     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4430   }
4431   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4432   PetscFunctionReturn(0);
4433 }
4434 
4435 #include <../src/mat/utils/freespace.h>
4436 #include <petscbt.h>
4437 
4438 #undef __FUNCT__
4439 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4440 /*@C
4441       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4442                  matrices from each processor
4443 
4444     Collective on MPI_Comm
4445 
4446    Input Parameters:
4447 +    comm - the communicators the parallel matrix will live on
4448 .    seqmat - the input sequential matrices
4449 .    m - number of local rows (or PETSC_DECIDE)
4450 .    n - number of local columns (or PETSC_DECIDE)
4451 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4452 
4453    Output Parameter:
4454 .    mpimat - the parallel matrix generated
4455 
4456     Level: advanced
4457 
4458    Notes:
4459      The dimensions of the sequential matrix in each processor MUST be the same.
4460      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4461      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4462 @*/
4463 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4464 {
4465   PetscErrorCode       ierr;
4466   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4467   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4468   PetscMPIInt          size,rank,taga,*len_s;
4469   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4470   PetscInt             proc,m;
4471   PetscInt             **buf_ri,**buf_rj;
4472   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4473   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4474   MPI_Request          *s_waits,*r_waits;
4475   MPI_Status           *status;
4476   MatScalar            *aa=a->a;
4477   MatScalar            **abuf_r,*ba_i;
4478   Mat_Merge_SeqsToMPI  *merge;
4479   PetscContainer       container;
4480 
4481   PetscFunctionBegin;
4482   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4483 
4484   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4485   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4486 
4487   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4488   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4489 
4490   bi     = merge->bi;
4491   bj     = merge->bj;
4492   buf_ri = merge->buf_ri;
4493   buf_rj = merge->buf_rj;
4494 
4495   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4496   owners = merge->rowmap->range;
4497   len_s  = merge->len_s;
4498 
4499   /* send and recv matrix values */
4500   /*-----------------------------*/
4501   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4502   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4503 
4504   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4505   for (proc=0,k=0; proc<size; proc++){
4506     if (!len_s[proc]) continue;
4507     i = owners[proc];
4508     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4509     k++;
4510   }
4511 
4512   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4513   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4514   ierr = PetscFree(status);CHKERRQ(ierr);
4515 
4516   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4517   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4518 
4519   /* insert mat values of mpimat */
4520   /*----------------------------*/
4521   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4522   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4523 
4524   for (k=0; k<merge->nrecv; k++){
4525     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4526     nrows = *(buf_ri_k[k]);
4527     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4528     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4529   }
4530 
4531   /* set values of ba */
4532   m = merge->rowmap->n;
4533   for (i=0; i<m; i++) {
4534     arow = owners[rank] + i;
4535     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4536     bnzi = bi[i+1] - bi[i];
4537     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4538 
4539     /* add local non-zero vals of this proc's seqmat into ba */
4540     anzi = ai[arow+1] - ai[arow];
4541     aj   = a->j + ai[arow];
4542     aa   = a->a + ai[arow];
4543     nextaj = 0;
4544     for (j=0; nextaj<anzi; j++){
4545       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4546         ba_i[j] += aa[nextaj++];
4547       }
4548     }
4549 
4550     /* add received vals into ba */
4551     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4552       /* i-th row */
4553       if (i == *nextrow[k]) {
4554         anzi = *(nextai[k]+1) - *nextai[k];
4555         aj   = buf_rj[k] + *(nextai[k]);
4556         aa   = abuf_r[k] + *(nextai[k]);
4557         nextaj = 0;
4558         for (j=0; nextaj<anzi; j++){
4559           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4560             ba_i[j] += aa[nextaj++];
4561           }
4562         }
4563         nextrow[k]++; nextai[k]++;
4564       }
4565     }
4566     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4567   }
4568   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4569   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4570 
4571   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4572   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4573   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4574   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4575   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4576   PetscFunctionReturn(0);
4577 }
4578 
4579 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4580 
4581 #undef __FUNCT__
4582 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4583 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4584 {
4585   PetscErrorCode       ierr;
4586   Mat                  B_mpi;
4587   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4588   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4589   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4590   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4591   PetscInt             len,proc,*dnz,*onz;
4592   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4593   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4594   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4595   MPI_Status           *status;
4596   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4597   PetscBT              lnkbt;
4598   Mat_Merge_SeqsToMPI  *merge;
4599   PetscContainer       container;
4600 
4601   PetscFunctionBegin;
4602   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4603 
4604   /* make sure it is a PETSc comm */
4605   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4606   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4607   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4608 
4609   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4610   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4611 
4612   /* determine row ownership */
4613   /*---------------------------------------------------------*/
4614   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4615   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4616   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4617   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4618   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4619   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4620   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4621 
4622   m      = merge->rowmap->n;
4623   M      = merge->rowmap->N;
4624   owners = merge->rowmap->range;
4625 
4626   /* determine the number of messages to send, their lengths */
4627   /*---------------------------------------------------------*/
4628   len_s  = merge->len_s;
4629 
4630   len = 0;  /* length of buf_si[] */
4631   merge->nsend = 0;
4632   for (proc=0; proc<size; proc++){
4633     len_si[proc] = 0;
4634     if (proc == rank){
4635       len_s[proc] = 0;
4636     } else {
4637       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4638       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4639     }
4640     if (len_s[proc]) {
4641       merge->nsend++;
4642       nrows = 0;
4643       for (i=owners[proc]; i<owners[proc+1]; i++){
4644         if (ai[i+1] > ai[i]) nrows++;
4645       }
4646       len_si[proc] = 2*(nrows+1);
4647       len += len_si[proc];
4648     }
4649   }
4650 
4651   /* determine the number and length of messages to receive for ij-structure */
4652   /*-------------------------------------------------------------------------*/
4653   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4654   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4655 
4656   /* post the Irecv of j-structure */
4657   /*-------------------------------*/
4658   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4659   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4660 
4661   /* post the Isend of j-structure */
4662   /*--------------------------------*/
4663   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4664 
4665   for (proc=0, k=0; proc<size; proc++){
4666     if (!len_s[proc]) continue;
4667     i = owners[proc];
4668     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4669     k++;
4670   }
4671 
4672   /* receives and sends of j-structure are complete */
4673   /*------------------------------------------------*/
4674   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4675   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4676 
4677   /* send and recv i-structure */
4678   /*---------------------------*/
4679   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4680   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4681 
4682   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4683   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4684   for (proc=0,k=0; proc<size; proc++){
4685     if (!len_s[proc]) continue;
4686     /* form outgoing message for i-structure:
4687          buf_si[0]:                 nrows to be sent
4688                [1:nrows]:           row index (global)
4689                [nrows+1:2*nrows+1]: i-structure index
4690     */
4691     /*-------------------------------------------*/
4692     nrows = len_si[proc]/2 - 1;
4693     buf_si_i    = buf_si + nrows+1;
4694     buf_si[0]   = nrows;
4695     buf_si_i[0] = 0;
4696     nrows = 0;
4697     for (i=owners[proc]; i<owners[proc+1]; i++){
4698       anzi = ai[i+1] - ai[i];
4699       if (anzi) {
4700         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4701         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4702         nrows++;
4703       }
4704     }
4705     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4706     k++;
4707     buf_si += len_si[proc];
4708   }
4709 
4710   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4711   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4712 
4713   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4714   for (i=0; i<merge->nrecv; i++){
4715     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4716   }
4717 
4718   ierr = PetscFree(len_si);CHKERRQ(ierr);
4719   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4720   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4721   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4722   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4723   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4724   ierr = PetscFree(status);CHKERRQ(ierr);
4725 
4726   /* compute a local seq matrix in each processor */
4727   /*----------------------------------------------*/
4728   /* allocate bi array and free space for accumulating nonzero column info */
4729   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4730   bi[0] = 0;
4731 
4732   /* create and initialize a linked list */
4733   nlnk = N+1;
4734   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4735 
4736   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4737   len = 0;
4738   len  = ai[owners[rank+1]] - ai[owners[rank]];
4739   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4740   current_space = free_space;
4741 
4742   /* determine symbolic info for each local row */
4743   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4744 
4745   for (k=0; k<merge->nrecv; k++){
4746     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4747     nrows = *buf_ri_k[k];
4748     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4749     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4750   }
4751 
4752   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4753   len = 0;
4754   for (i=0;i<m;i++) {
4755     bnzi   = 0;
4756     /* add local non-zero cols of this proc's seqmat into lnk */
4757     arow   = owners[rank] + i;
4758     anzi   = ai[arow+1] - ai[arow];
4759     aj     = a->j + ai[arow];
4760     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4761     bnzi += nlnk;
4762     /* add received col data into lnk */
4763     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4764       if (i == *nextrow[k]) { /* i-th row */
4765         anzi = *(nextai[k]+1) - *nextai[k];
4766         aj   = buf_rj[k] + *nextai[k];
4767         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4768         bnzi += nlnk;
4769         nextrow[k]++; nextai[k]++;
4770       }
4771     }
4772     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4773 
4774     /* if free space is not available, make more free space */
4775     if (current_space->local_remaining<bnzi) {
4776       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4777       nspacedouble++;
4778     }
4779     /* copy data into free space, then initialize lnk */
4780     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4781     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4782 
4783     current_space->array           += bnzi;
4784     current_space->local_used      += bnzi;
4785     current_space->local_remaining -= bnzi;
4786 
4787     bi[i+1] = bi[i] + bnzi;
4788   }
4789 
4790   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4791 
4792   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4793   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4794   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4795 
4796   /* create symbolic parallel matrix B_mpi */
4797   /*---------------------------------------*/
4798   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4799   if (n==PETSC_DECIDE) {
4800     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4801   } else {
4802     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4803   }
4804   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4805   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4806   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4807 
4808   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4809   B_mpi->assembled     = PETSC_FALSE;
4810   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4811   merge->bi            = bi;
4812   merge->bj            = bj;
4813   merge->buf_ri        = buf_ri;
4814   merge->buf_rj        = buf_rj;
4815   merge->coi           = PETSC_NULL;
4816   merge->coj           = PETSC_NULL;
4817   merge->owners_co     = PETSC_NULL;
4818 
4819   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4820 
4821   /* attach the supporting struct to B_mpi for reuse */
4822   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4823   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4824   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4825   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4826   *mpimat = B_mpi;
4827 
4828   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4829   PetscFunctionReturn(0);
4830 }
4831 
4832 #undef __FUNCT__
4833 #define __FUNCT__ "MatMerge_SeqsToMPI"
4834 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4835 {
4836   PetscErrorCode   ierr;
4837 
4838   PetscFunctionBegin;
4839   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4840   if (scall == MAT_INITIAL_MATRIX){
4841     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4842   }
4843   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4844   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4845   PetscFunctionReturn(0);
4846 }
4847 
4848 #undef __FUNCT__
4849 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4850 /*@
4851      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4852           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4853           with MatGetSize()
4854 
4855     Not Collective
4856 
4857    Input Parameters:
4858 +    A - the matrix
4859 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4860 
4861    Output Parameter:
4862 .    A_loc - the local sequential matrix generated
4863 
4864     Level: developer
4865 
4866 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4867 
4868 @*/
4869 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4870 {
4871   PetscErrorCode  ierr;
4872   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4873   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4874   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4875   MatScalar       *aa=a->a,*ba=b->a,*cam;
4876   PetscScalar     *ca;
4877   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4878   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4879   PetscBool       match;
4880 
4881   PetscFunctionBegin;
4882   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4883   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4884   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4885   if (scall == MAT_INITIAL_MATRIX){
4886     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4887     ci[0] = 0;
4888     for (i=0; i<am; i++){
4889       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4890     }
4891     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4892     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4893     k = 0;
4894     for (i=0; i<am; i++) {
4895       ncols_o = bi[i+1] - bi[i];
4896       ncols_d = ai[i+1] - ai[i];
4897       /* off-diagonal portion of A */
4898       for (jo=0; jo<ncols_o; jo++) {
4899         col = cmap[*bj];
4900         if (col >= cstart) break;
4901         cj[k]   = col; bj++;
4902         ca[k++] = *ba++;
4903       }
4904       /* diagonal portion of A */
4905       for (j=0; j<ncols_d; j++) {
4906         cj[k]   = cstart + *aj++;
4907         ca[k++] = *aa++;
4908       }
4909       /* off-diagonal portion of A */
4910       for (j=jo; j<ncols_o; j++) {
4911         cj[k]   = cmap[*bj++];
4912         ca[k++] = *ba++;
4913       }
4914     }
4915     /* put together the new matrix */
4916     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4917     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4918     /* Since these are PETSc arrays, change flags to free them as necessary. */
4919     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4920     mat->free_a  = PETSC_TRUE;
4921     mat->free_ij = PETSC_TRUE;
4922     mat->nonew   = 0;
4923   } else if (scall == MAT_REUSE_MATRIX){
4924     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4925     ci = mat->i; cj = mat->j; cam = mat->a;
4926     for (i=0; i<am; i++) {
4927       /* off-diagonal portion of A */
4928       ncols_o = bi[i+1] - bi[i];
4929       for (jo=0; jo<ncols_o; jo++) {
4930         col = cmap[*bj];
4931         if (col >= cstart) break;
4932         *cam++ = *ba++; bj++;
4933       }
4934       /* diagonal portion of A */
4935       ncols_d = ai[i+1] - ai[i];
4936       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4937       /* off-diagonal portion of A */
4938       for (j=jo; j<ncols_o; j++) {
4939         *cam++ = *ba++; bj++;
4940       }
4941     }
4942   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4943   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4944   PetscFunctionReturn(0);
4945 }
4946 
4947 #undef __FUNCT__
4948 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4949 /*@C
4950      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4951 
4952     Not Collective
4953 
4954    Input Parameters:
4955 +    A - the matrix
4956 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4957 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4958 
4959    Output Parameter:
4960 .    A_loc - the local sequential matrix generated
4961 
4962     Level: developer
4963 
4964 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4965 
4966 @*/
4967 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4968 {
4969   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4970   PetscErrorCode    ierr;
4971   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4972   IS                isrowa,iscola;
4973   Mat               *aloc;
4974   PetscBool       match;
4975 
4976   PetscFunctionBegin;
4977   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4978   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4979   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4980   if (!row){
4981     start = A->rmap->rstart; end = A->rmap->rend;
4982     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4983   } else {
4984     isrowa = *row;
4985   }
4986   if (!col){
4987     start = A->cmap->rstart;
4988     cmap  = a->garray;
4989     nzA   = a->A->cmap->n;
4990     nzB   = a->B->cmap->n;
4991     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4992     ncols = 0;
4993     for (i=0; i<nzB; i++) {
4994       if (cmap[i] < start) idx[ncols++] = cmap[i];
4995       else break;
4996     }
4997     imark = i;
4998     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4999     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5000     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5001   } else {
5002     iscola = *col;
5003   }
5004   if (scall != MAT_INITIAL_MATRIX){
5005     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5006     aloc[0] = *A_loc;
5007   }
5008   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5009   *A_loc = aloc[0];
5010   ierr = PetscFree(aloc);CHKERRQ(ierr);
5011   if (!row){
5012     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5013   }
5014   if (!col){
5015     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5016   }
5017   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5018   PetscFunctionReturn(0);
5019 }
5020 
5021 #undef __FUNCT__
5022 #define __FUNCT__ "MatGetBrowsOfAcols"
5023 /*@C
5024     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5025 
5026     Collective on Mat
5027 
5028    Input Parameters:
5029 +    A,B - the matrices in mpiaij format
5030 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5031 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5032 
5033    Output Parameter:
5034 +    rowb, colb - index sets of rows and columns of B to extract
5035 -    B_seq - the sequential matrix generated
5036 
5037     Level: developer
5038 
5039 @*/
5040 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5041 {
5042   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5043   PetscErrorCode    ierr;
5044   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5045   IS                isrowb,iscolb;
5046   Mat               *bseq=PETSC_NULL;
5047 
5048   PetscFunctionBegin;
5049   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5050     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5051   }
5052   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5053 
5054   if (scall == MAT_INITIAL_MATRIX){
5055     start = A->cmap->rstart;
5056     cmap  = a->garray;
5057     nzA   = a->A->cmap->n;
5058     nzB   = a->B->cmap->n;
5059     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5060     ncols = 0;
5061     for (i=0; i<nzB; i++) {  /* row < local row index */
5062       if (cmap[i] < start) idx[ncols++] = cmap[i];
5063       else break;
5064     }
5065     imark = i;
5066     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5067     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5068     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5069     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5070   } else {
5071     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5072     isrowb = *rowb; iscolb = *colb;
5073     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5074     bseq[0] = *B_seq;
5075   }
5076   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5077   *B_seq = bseq[0];
5078   ierr = PetscFree(bseq);CHKERRQ(ierr);
5079   if (!rowb){
5080     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5081   } else {
5082     *rowb = isrowb;
5083   }
5084   if (!colb){
5085     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5086   } else {
5087     *colb = iscolb;
5088   }
5089   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5090   PetscFunctionReturn(0);
5091 }
5092 
5093 #undef __FUNCT__
5094 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5095 /*
5096     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5097     of the OFF-DIAGONAL portion of local A
5098 
5099     Collective on Mat
5100 
5101    Input Parameters:
5102 +    A,B - the matrices in mpiaij format
5103 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5104 
5105    Output Parameter:
5106 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5107 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5108 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5109 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5110 
5111     Level: developer
5112 
5113 */
5114 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5115 {
5116   VecScatter_MPI_General *gen_to,*gen_from;
5117   PetscErrorCode         ierr;
5118   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5119   Mat_SeqAIJ             *b_oth;
5120   VecScatter             ctx=a->Mvctx;
5121   MPI_Comm               comm=((PetscObject)ctx)->comm;
5122   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5123   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5124   PetscScalar            *rvalues,*svalues;
5125   MatScalar              *b_otha,*bufa,*bufA;
5126   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5127   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5128   MPI_Status             *sstatus,rstatus;
5129   PetscMPIInt            jj;
5130   PetscInt               *cols,sbs,rbs;
5131   PetscScalar            *vals;
5132 
5133   PetscFunctionBegin;
5134   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5135     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5136   }
5137   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5138   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5139 
5140   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5141   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5142   rvalues  = gen_from->values; /* holds the length of receiving row */
5143   svalues  = gen_to->values;   /* holds the length of sending row */
5144   nrecvs   = gen_from->n;
5145   nsends   = gen_to->n;
5146 
5147   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5148   srow     = gen_to->indices;   /* local row index to be sent */
5149   sstarts  = gen_to->starts;
5150   sprocs   = gen_to->procs;
5151   sstatus  = gen_to->sstatus;
5152   sbs      = gen_to->bs;
5153   rstarts  = gen_from->starts;
5154   rprocs   = gen_from->procs;
5155   rbs      = gen_from->bs;
5156 
5157   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5158   if (scall == MAT_INITIAL_MATRIX){
5159     /* i-array */
5160     /*---------*/
5161     /*  post receives */
5162     for (i=0; i<nrecvs; i++){
5163       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5164       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5165       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5166     }
5167 
5168     /* pack the outgoing message */
5169     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5170     sstartsj[0] = 0;  rstartsj[0] = 0;
5171     len = 0; /* total length of j or a array to be sent */
5172     k = 0;
5173     for (i=0; i<nsends; i++){
5174       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5175       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5176       for (j=0; j<nrows; j++) {
5177         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5178         for (l=0; l<sbs; l++){
5179           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5180           rowlen[j*sbs+l] = ncols;
5181           len += ncols;
5182           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5183         }
5184         k++;
5185       }
5186       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5187       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5188     }
5189     /* recvs and sends of i-array are completed */
5190     i = nrecvs;
5191     while (i--) {
5192       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5193     }
5194     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5195 
5196     /* allocate buffers for sending j and a arrays */
5197     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5198     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5199 
5200     /* create i-array of B_oth */
5201     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5202     b_othi[0] = 0;
5203     len = 0; /* total length of j or a array to be received */
5204     k = 0;
5205     for (i=0; i<nrecvs; i++){
5206       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5207       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5208       for (j=0; j<nrows; j++) {
5209         b_othi[k+1] = b_othi[k] + rowlen[j];
5210         len += rowlen[j]; k++;
5211       }
5212       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5213     }
5214 
5215     /* allocate space for j and a arrrays of B_oth */
5216     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5217     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5218 
5219     /* j-array */
5220     /*---------*/
5221     /*  post receives of j-array */
5222     for (i=0; i<nrecvs; i++){
5223       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5224       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5225     }
5226 
5227     /* pack the outgoing message j-array */
5228     k = 0;
5229     for (i=0; i<nsends; i++){
5230       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5231       bufJ = bufj+sstartsj[i];
5232       for (j=0; j<nrows; j++) {
5233         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5234         for (ll=0; ll<sbs; ll++){
5235           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5236           for (l=0; l<ncols; l++){
5237             *bufJ++ = cols[l];
5238           }
5239           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5240         }
5241       }
5242       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5243     }
5244 
5245     /* recvs and sends of j-array are completed */
5246     i = nrecvs;
5247     while (i--) {
5248       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5249     }
5250     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5251   } else if (scall == MAT_REUSE_MATRIX){
5252     sstartsj = *startsj_s;
5253     rstartsj = *startsj_r;
5254     bufa     = *bufa_ptr;
5255     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5256     b_otha   = b_oth->a;
5257   } else {
5258     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5259   }
5260 
5261   /* a-array */
5262   /*---------*/
5263   /*  post receives of a-array */
5264   for (i=0; i<nrecvs; i++){
5265     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5266     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5267   }
5268 
5269   /* pack the outgoing message a-array */
5270   k = 0;
5271   for (i=0; i<nsends; i++){
5272     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5273     bufA = bufa+sstartsj[i];
5274     for (j=0; j<nrows; j++) {
5275       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5276       for (ll=0; ll<sbs; ll++){
5277         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5278         for (l=0; l<ncols; l++){
5279           *bufA++ = vals[l];
5280         }
5281         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5282       }
5283     }
5284     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5285   }
5286   /* recvs and sends of a-array are completed */
5287   i = nrecvs;
5288   while (i--) {
5289     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5290   }
5291   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5292   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5293 
5294   if (scall == MAT_INITIAL_MATRIX){
5295     /* put together the new matrix */
5296     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5297 
5298     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5299     /* Since these are PETSc arrays, change flags to free them as necessary. */
5300     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5301     b_oth->free_a  = PETSC_TRUE;
5302     b_oth->free_ij = PETSC_TRUE;
5303     b_oth->nonew   = 0;
5304 
5305     ierr = PetscFree(bufj);CHKERRQ(ierr);
5306     if (!startsj_s || !bufa_ptr){
5307       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5308       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5309     } else {
5310       *startsj_s = sstartsj;
5311       *startsj_r = rstartsj;
5312       *bufa_ptr  = bufa;
5313     }
5314   }
5315   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5316   PetscFunctionReturn(0);
5317 }
5318 
5319 #undef __FUNCT__
5320 #define __FUNCT__ "MatGetCommunicationStructs"
5321 /*@C
5322   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5323 
5324   Not Collective
5325 
5326   Input Parameters:
5327 . A - The matrix in mpiaij format
5328 
5329   Output Parameter:
5330 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5331 . colmap - A map from global column index to local index into lvec
5332 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5333 
5334   Level: developer
5335 
5336 @*/
5337 #if defined (PETSC_USE_CTABLE)
5338 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5339 #else
5340 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5341 #endif
5342 {
5343   Mat_MPIAIJ *a;
5344 
5345   PetscFunctionBegin;
5346   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5347   PetscValidPointer(lvec, 2);
5348   PetscValidPointer(colmap, 3);
5349   PetscValidPointer(multScatter, 4);
5350   a = (Mat_MPIAIJ *) A->data;
5351   if (lvec) *lvec = a->lvec;
5352   if (colmap) *colmap = a->colmap;
5353   if (multScatter) *multScatter = a->Mvctx;
5354   PetscFunctionReturn(0);
5355 }
5356 
5357 EXTERN_C_BEGIN
5358 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5359 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5360 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5361 EXTERN_C_END
5362 
5363 #undef __FUNCT__
5364 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5365 /*
5366     Computes (B'*A')' since computing B*A directly is untenable
5367 
5368                n                       p                          p
5369         (              )       (              )         (                  )
5370       m (      A       )  *  n (       B      )   =   m (         C        )
5371         (              )       (              )         (                  )
5372 
5373 */
5374 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5375 {
5376   PetscErrorCode     ierr;
5377   Mat                At,Bt,Ct;
5378 
5379   PetscFunctionBegin;
5380   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5381   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5382   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5383   ierr = MatDestroy(&At);CHKERRQ(ierr);
5384   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5385   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5386   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5387   PetscFunctionReturn(0);
5388 }
5389 
5390 #undef __FUNCT__
5391 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5392 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5393 {
5394   PetscErrorCode ierr;
5395   PetscInt       m=A->rmap->n,n=B->cmap->n;
5396   Mat            Cmat;
5397 
5398   PetscFunctionBegin;
5399   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5400   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5401   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5402   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5403   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5404   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5405   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5406   *C   = Cmat;
5407   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5408   PetscFunctionReturn(0);
5409 }
5410 
5411 /* ----------------------------------------------------------------*/
5412 #undef __FUNCT__
5413 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5414 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5415 {
5416   PetscErrorCode ierr;
5417 
5418   PetscFunctionBegin;
5419   if (scall == MAT_INITIAL_MATRIX){
5420     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5421   }
5422   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5423   PetscFunctionReturn(0);
5424 }
5425 
5426 EXTERN_C_BEGIN
5427 #if defined(PETSC_HAVE_MUMPS)
5428 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5429 #endif
5430 #if defined(PETSC_HAVE_PASTIX)
5431 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5432 #endif
5433 #if defined(PETSC_HAVE_SUPERLU_DIST)
5434 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5435 #endif
5436 #if defined(PETSC_HAVE_SPOOLES)
5437 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5438 #endif
5439 EXTERN_C_END
5440 
5441 /*MC
5442    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5443 
5444    Options Database Keys:
5445 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5446 
5447   Level: beginner
5448 
5449 .seealso: MatCreateMPIAIJ()
5450 M*/
5451 
5452 EXTERN_C_BEGIN
5453 #undef __FUNCT__
5454 #define __FUNCT__ "MatCreate_MPIAIJ"
5455 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5456 {
5457   Mat_MPIAIJ     *b;
5458   PetscErrorCode ierr;
5459   PetscMPIInt    size;
5460 
5461   PetscFunctionBegin;
5462   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5463 
5464   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5465   B->data         = (void*)b;
5466   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5467   B->rmap->bs     = 1;
5468   B->assembled    = PETSC_FALSE;
5469 
5470   B->insertmode   = NOT_SET_VALUES;
5471   b->size         = size;
5472   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5473 
5474   /* build cache for off array entries formed */
5475   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5476   b->donotstash  = PETSC_FALSE;
5477   b->colmap      = 0;
5478   b->garray      = 0;
5479   b->roworiented = PETSC_TRUE;
5480 
5481   /* stuff used for matrix vector multiply */
5482   b->lvec      = PETSC_NULL;
5483   b->Mvctx     = PETSC_NULL;
5484 
5485   /* stuff for MatGetRow() */
5486   b->rowindices   = 0;
5487   b->rowvalues    = 0;
5488   b->getrowactive = PETSC_FALSE;
5489 
5490 #if defined(PETSC_HAVE_SPOOLES)
5491   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5492                                      "MatGetFactor_mpiaij_spooles",
5493                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5494 #endif
5495 #if defined(PETSC_HAVE_MUMPS)
5496   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5497                                      "MatGetFactor_aij_mumps",
5498                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5499 #endif
5500 #if defined(PETSC_HAVE_PASTIX)
5501   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5502 					   "MatGetFactor_mpiaij_pastix",
5503 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5504 #endif
5505 #if defined(PETSC_HAVE_SUPERLU_DIST)
5506   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5507                                      "MatGetFactor_mpiaij_superlu_dist",
5508                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5509 #endif
5510   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5511                                      "MatStoreValues_MPIAIJ",
5512                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5513   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5514                                      "MatRetrieveValues_MPIAIJ",
5515                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5516   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5517 				     "MatGetDiagonalBlock_MPIAIJ",
5518                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5519   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5520 				     "MatIsTranspose_MPIAIJ",
5521 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5522   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5523 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5524 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5525   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5526 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5527 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5528   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5529 				     "MatDiagonalScaleLocal_MPIAIJ",
5530 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5531   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5532                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5533                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5534   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5535                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5536                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5537   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5538                                      "MatConvert_MPIAIJ_MPISBAIJ",
5539                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5540   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5541                                      "MatMatMult_MPIDense_MPIAIJ",
5542                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5543   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5544                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5545                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5546   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5547                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5548                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5549   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5550   PetscFunctionReturn(0);
5551 }
5552 EXTERN_C_END
5553 
5554 #undef __FUNCT__
5555 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5556 /*@
5557      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5558          and "off-diagonal" part of the matrix in CSR format.
5559 
5560    Collective on MPI_Comm
5561 
5562    Input Parameters:
5563 +  comm - MPI communicator
5564 .  m - number of local rows (Cannot be PETSC_DECIDE)
5565 .  n - This value should be the same as the local size used in creating the
5566        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5567        calculated if N is given) For square matrices n is almost always m.
5568 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5569 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5570 .   i - row indices for "diagonal" portion of matrix
5571 .   j - column indices
5572 .   a - matrix values
5573 .   oi - row indices for "off-diagonal" portion of matrix
5574 .   oj - column indices
5575 -   oa - matrix values
5576 
5577    Output Parameter:
5578 .   mat - the matrix
5579 
5580    Level: advanced
5581 
5582    Notes:
5583        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5584        must free the arrays once the matrix has been destroyed and not before.
5585 
5586        The i and j indices are 0 based
5587 
5588        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5589 
5590        This sets local rows and cannot be used to set off-processor values.
5591 
5592        You cannot later use MatSetValues() to change values in this matrix.
5593 
5594 .keywords: matrix, aij, compressed row, sparse, parallel
5595 
5596 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5597           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5598 @*/
5599 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5600 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5601 {
5602   PetscErrorCode ierr;
5603   Mat_MPIAIJ     *maij;
5604 
5605  PetscFunctionBegin;
5606   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5607   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5608   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5609   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5610   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5611   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5612   maij = (Mat_MPIAIJ*) (*mat)->data;
5613   maij->donotstash     = PETSC_TRUE;
5614   (*mat)->preallocated = PETSC_TRUE;
5615 
5616   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5617   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5618   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5619   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5620 
5621   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5622   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5623 
5624   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5625   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5626   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5627   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5628 
5629   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5630   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5631   PetscFunctionReturn(0);
5632 }
5633 
5634 /*
5635     Special version for direct calls from Fortran
5636 */
5637 #include <private/fortranimpl.h>
5638 
5639 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5640 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5641 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5642 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5643 #endif
5644 
5645 /* Change these macros so can be used in void function */
5646 #undef CHKERRQ
5647 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5648 #undef SETERRQ2
5649 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5650 #undef SETERRQ
5651 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5652 
5653 EXTERN_C_BEGIN
5654 #undef __FUNCT__
5655 #define __FUNCT__ "matsetvaluesmpiaij_"
5656 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5657 {
5658   Mat             mat = *mmat;
5659   PetscInt        m = *mm, n = *mn;
5660   InsertMode      addv = *maddv;
5661   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5662   PetscScalar     value;
5663   PetscErrorCode  ierr;
5664 
5665   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5666   if (mat->insertmode == NOT_SET_VALUES) {
5667     mat->insertmode = addv;
5668   }
5669 #if defined(PETSC_USE_DEBUG)
5670   else if (mat->insertmode != addv) {
5671     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5672   }
5673 #endif
5674   {
5675   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5676   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5677   PetscBool       roworiented = aij->roworiented;
5678 
5679   /* Some Variables required in the macro */
5680   Mat             A = aij->A;
5681   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5682   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5683   MatScalar       *aa = a->a;
5684   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5685   Mat             B = aij->B;
5686   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5687   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5688   MatScalar       *ba = b->a;
5689 
5690   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5691   PetscInt        nonew = a->nonew;
5692   MatScalar       *ap1,*ap2;
5693 
5694   PetscFunctionBegin;
5695   for (i=0; i<m; i++) {
5696     if (im[i] < 0) continue;
5697 #if defined(PETSC_USE_DEBUG)
5698     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5699 #endif
5700     if (im[i] >= rstart && im[i] < rend) {
5701       row      = im[i] - rstart;
5702       lastcol1 = -1;
5703       rp1      = aj + ai[row];
5704       ap1      = aa + ai[row];
5705       rmax1    = aimax[row];
5706       nrow1    = ailen[row];
5707       low1     = 0;
5708       high1    = nrow1;
5709       lastcol2 = -1;
5710       rp2      = bj + bi[row];
5711       ap2      = ba + bi[row];
5712       rmax2    = bimax[row];
5713       nrow2    = bilen[row];
5714       low2     = 0;
5715       high2    = nrow2;
5716 
5717       for (j=0; j<n; j++) {
5718         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5719         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5720         if (in[j] >= cstart && in[j] < cend){
5721           col = in[j] - cstart;
5722           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5723         } else if (in[j] < 0) continue;
5724 #if defined(PETSC_USE_DEBUG)
5725         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5726 #endif
5727         else {
5728           if (mat->was_assembled) {
5729             if (!aij->colmap) {
5730               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5731             }
5732 #if defined (PETSC_USE_CTABLE)
5733             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5734 	    col--;
5735 #else
5736             col = aij->colmap[in[j]] - 1;
5737 #endif
5738             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5739               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5740               col =  in[j];
5741               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5742               B = aij->B;
5743               b = (Mat_SeqAIJ*)B->data;
5744               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5745               rp2      = bj + bi[row];
5746               ap2      = ba + bi[row];
5747               rmax2    = bimax[row];
5748               nrow2    = bilen[row];
5749               low2     = 0;
5750               high2    = nrow2;
5751               bm       = aij->B->rmap->n;
5752               ba = b->a;
5753             }
5754           } else col = in[j];
5755           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5756         }
5757       }
5758     } else {
5759       if (!aij->donotstash) {
5760         if (roworiented) {
5761           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5762         } else {
5763           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5764         }
5765       }
5766     }
5767   }}
5768   PetscFunctionReturnVoid();
5769 }
5770 EXTERN_C_END
5771 
5772