xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fbada4e6d62d82be69a0720ca41ff3cebed5b071)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew = a->nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506         } else if (in[j] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509 #endif
510         else {
511           if (mat->was_assembled) {
512             if (!aij->colmap) {
513               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
514             }
515 #if defined (PETSC_USE_CTABLE)
516             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
517 	    col--;
518 #else
519             col = aij->colmap[in[j]] - 1;
520 #endif
521             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523               col =  in[j];
524               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525               B = aij->B;
526               b = (Mat_SeqAIJ*)B->data;
527               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528               rp2      = bj + bi[row];
529               ap2      = ba + bi[row];
530               rmax2    = bimax[row];
531               nrow2    = bilen[row];
532               low2     = 0;
533               high2    = nrow2;
534               bm       = aij->B->rmap->n;
535               ba = b->a;
536             }
537           } else col = in[j];
538           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539         }
540       }
541     } else {
542       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543       if (!aij->donotstash) {
544         if (roworiented) {
545           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
546         } else {
547           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPIAIJ"
557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558 {
559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
560   PetscErrorCode ierr;
561   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
563 
564   PetscFunctionBegin;
565   for (i=0; i<m; i++) {
566     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568     if (idxm[i] >= rstart && idxm[i] < rend) {
569       row = idxm[i] - rstart;
570       for (j=0; j<n; j++) {
571         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573         if (idxn[j] >= cstart && idxn[j] < cend){
574           col = idxn[j] - cstart;
575           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
576         } else {
577           if (!aij->colmap) {
578             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
579           }
580 #if defined (PETSC_USE_CTABLE)
581           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
582           col --;
583 #else
584           col = aij->colmap[idxn[j]] - 1;
585 #endif
586           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587           else {
588             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604 {
605   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
606   PetscErrorCode ierr;
607   PetscInt       nstash,reallocs;
608   InsertMode     addv;
609 
610   PetscFunctionBegin;
611   if (aij->donotstash || mat->nooffprocentries) {
612     PetscFunctionReturn(0);
613   }
614 
615   /* make sure all processors are either in INSERTMODE or ADDMODE */
616   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
617   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618   mat->insertmode = addv; /* in case this processor had no cache */
619 
620   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
621   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
622   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
632   PetscErrorCode ierr;
633   PetscMPIInt    n;
634   PetscInt       i,j,rstart,ncols,flg;
635   PetscInt       *row,*col;
636   PetscBool      other_disassembled;
637   PetscScalar    *val;
638   InsertMode     addv = mat->insertmode;
639 
640   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
641   PetscFunctionBegin;
642   if (!aij->donotstash && !mat->nooffprocentries) {
643     while (1) {
644       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
645       if (!flg) break;
646 
647       for (i=0; i<n;) {
648         /* Now identify the consecutive vals belonging to the same row */
649         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650         if (j < n) ncols = j-i;
651         else       ncols = n-i;
652         /* Now assemble all these values with a single function call */
653         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
654         i = j;
655       }
656     }
657     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
658   }
659   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
660   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
661 
662   /* determine if any processor has disassembled, if so we must
663      also disassemble ourselfs, in order that we may reassemble. */
664   /*
665      if nonzero structure of submatrix B cannot change then we know that
666      no processor disassembled thus we can skip this stuff
667   */
668   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
669     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
670     if (mat->was_assembled && !other_disassembled) {
671       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
672     }
673   }
674   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
676   }
677   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
678   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
679   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
681 
682   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
683   aij->rowvalues = 0;
684 
685   /* used by MatAXPY() */
686   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
687   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
688 
689   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
690   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697 {
698   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
699   PetscErrorCode ierr;
700 
701   PetscFunctionBegin;
702   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
703   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatZeroRows_MPIAIJ"
709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710 {
711   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
712   PetscErrorCode    ierr;
713   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714   PetscInt          i,*owners = A->rmap->range;
715   PetscInt          *nprocs,j,idx,nsends,row;
716   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
717   PetscInt          *rvalues,count,base,slen,*source;
718   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
719   MPI_Comm          comm = ((PetscObject)A)->comm;
720   MPI_Request       *send_waits,*recv_waits;
721   MPI_Status        recv_status,*send_status;
722   const PetscScalar *xx;
723   PetscScalar       *bb;
724 #if defined(PETSC_DEBUG)
725   PetscBool      found = PETSC_FALSE;
726 #endif
727 
728   PetscFunctionBegin;
729   /*  first count number of contributors to each processor */
730   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
731   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
732   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
733   j = 0;
734   for (i=0; i<N; i++) {
735     if (lastidx > (idx = rows[i])) j = 0;
736     lastidx = idx;
737     for (; j<size; j++) {
738       if (idx >= owners[j] && idx < owners[j+1]) {
739         nprocs[2*j]++;
740         nprocs[2*j+1] = 1;
741         owner[i] = j;
742 #if defined(PETSC_DEBUG)
743         found = PETSC_TRUE;
744 #endif
745         break;
746       }
747     }
748 #if defined(PETSC_DEBUG)
749     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750     found = PETSC_FALSE;
751 #endif
752   }
753   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
754 
755   if (A->nooffproczerorows) {
756     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757     nrecvs = nsends;
758     nmax   = N;
759   } else {
760     /* inform other processors of number of messages and max length*/
761     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
762   }
763 
764   /* post receives:   */
765   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
766   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
767   for (i=0; i<nrecvs; i++) {
768     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
769   }
770 
771   /* do sends:
772       1) starts[i] gives the starting index in svalues for stuff going to
773          the ith processor
774   */
775   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
776   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
777   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
778   starts[0] = 0;
779   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780   for (i=0; i<N; i++) {
781     svalues[starts[owner[i]]++] = rows[i];
782   }
783 
784   starts[0] = 0;
785   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786   count = 0;
787   for (i=0; i<size; i++) {
788     if (nprocs[2*i+1]) {
789       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
790     }
791   }
792   ierr = PetscFree(starts);CHKERRQ(ierr);
793 
794   base = owners[rank];
795 
796   /*  wait on receives */
797   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
798   count  = nrecvs; slen = 0;
799   while (count) {
800     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
801     /* unpack receives into our local space */
802     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
803     source[imdex]  = recv_status.MPI_SOURCE;
804     lens[imdex]    = n;
805     slen          += n;
806     count--;
807   }
808   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
809 
810   /* move the data into the send scatter */
811   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
812   count = 0;
813   for (i=0; i<nrecvs; i++) {
814     values = rvalues + i*nmax;
815     for (j=0; j<lens[i]; j++) {
816       lrows[count++] = values[j] - base;
817     }
818   }
819   ierr = PetscFree(rvalues);CHKERRQ(ierr);
820   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
821   ierr = PetscFree(owner);CHKERRQ(ierr);
822   ierr = PetscFree(nprocs);CHKERRQ(ierr);
823 
824   /* fix right hand side if needed */
825   if (x && b) {
826     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
827     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
828     for (i=0; i<slen; i++) {
829       bb[lrows[i]] = diag*xx[lrows[i]];
830     }
831     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
833   }
834   /*
835         Zero the required rows. If the "diagonal block" of the matrix
836      is square and the user wishes to set the diagonal we use separate
837      code so that MatSetValues() is not called for each diagonal allocating
838      new memory, thus calling lots of mallocs and slowing things down.
839 
840   */
841   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
843   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850     }
851     for (i = 0; i < slen; i++) {
852       row  = lrows[i] + rstart;
853       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
854     }
855     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857   } else {
858     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(lrows);CHKERRQ(ierr);
861 
862   /* wait on sends */
863   if (nsends) {
864     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
865     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
866     ierr = PetscFree(send_status);CHKERRQ(ierr);
867   }
868   ierr = PetscFree(send_waits);CHKERRQ(ierr);
869   ierr = PetscFree(svalues);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
876 {
877   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode    ierr;
879   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
880   PetscInt          i,*owners = A->rmap->range;
881   PetscInt          *nprocs,j,idx,nsends;
882   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
883   PetscInt          *rvalues,count,base,slen,*source;
884   PetscInt          *lens,*lrows,*values,m;
885   MPI_Comm          comm = ((PetscObject)A)->comm;
886   MPI_Request       *send_waits,*recv_waits;
887   MPI_Status        recv_status,*send_status;
888   const PetscScalar *xx;
889   PetscScalar       *bb,*mask;
890   Vec               xmask,lmask;
891   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
892   const PetscInt    *aj, *ii,*ridx;
893   PetscScalar       *aa;
894 #if defined(PETSC_DEBUG)
895   PetscBool         found = PETSC_FALSE;
896 #endif
897 
898   PetscFunctionBegin;
899   /*  first count number of contributors to each processor */
900   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
901   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
902   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
903   j = 0;
904   for (i=0; i<N; i++) {
905     if (lastidx > (idx = rows[i])) j = 0;
906     lastidx = idx;
907     for (; j<size; j++) {
908       if (idx >= owners[j] && idx < owners[j+1]) {
909         nprocs[2*j]++;
910         nprocs[2*j+1] = 1;
911         owner[i] = j;
912 #if defined(PETSC_DEBUG)
913         found = PETSC_TRUE;
914 #endif
915         break;
916       }
917     }
918 #if defined(PETSC_DEBUG)
919     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
920     found = PETSC_FALSE;
921 #endif
922   }
923   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
924 
925   /* inform other processors of number of messages and max length*/
926   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
927 
928   /* post receives:   */
929   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
930   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
931   for (i=0; i<nrecvs; i++) {
932     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
933   }
934 
935   /* do sends:
936       1) starts[i] gives the starting index in svalues for stuff going to
937          the ith processor
938   */
939   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
940   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
941   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
942   starts[0] = 0;
943   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
944   for (i=0; i<N; i++) {
945     svalues[starts[owner[i]]++] = rows[i];
946   }
947 
948   starts[0] = 0;
949   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
950   count = 0;
951   for (i=0; i<size; i++) {
952     if (nprocs[2*i+1]) {
953       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
954     }
955   }
956   ierr = PetscFree(starts);CHKERRQ(ierr);
957 
958   base = owners[rank];
959 
960   /*  wait on receives */
961   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
962   count  = nrecvs; slen = 0;
963   while (count) {
964     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
965     /* unpack receives into our local space */
966     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
967     source[imdex]  = recv_status.MPI_SOURCE;
968     lens[imdex]    = n;
969     slen          += n;
970     count--;
971   }
972   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
973 
974   /* move the data into the send scatter */
975   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
976   count = 0;
977   for (i=0; i<nrecvs; i++) {
978     values = rvalues + i*nmax;
979     for (j=0; j<lens[i]; j++) {
980       lrows[count++] = values[j] - base;
981     }
982   }
983   ierr = PetscFree(rvalues);CHKERRQ(ierr);
984   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
985   ierr = PetscFree(owner);CHKERRQ(ierr);
986   ierr = PetscFree(nprocs);CHKERRQ(ierr);
987   /* lrows are the local rows to be zeroed, slen is the number of local rows */
988 
989   /* zero diagonal part of matrix */
990   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
991 
992   /* handle off diagonal part of matrix */
993   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
994   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
995   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
996   for (i=0; i<slen; i++) {
997     bb[lrows[i]] = 1;
998   }
999   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1000   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1003   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1006   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1007   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1008 
1009   /* remove zeroed rows of off diagonal matrix */
1010   ii = aij->i;
1011   for (i=0; i<slen; i++) {
1012     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1013   }
1014 
1015   /* loop over all elements of off process part of matrix zeroing removed columns*/
1016   if (aij->compressedrow.use){
1017     m    = aij->compressedrow.nrows;
1018     ii   = aij->compressedrow.i;
1019     ridx = aij->compressedrow.rindex;
1020     for (i=0; i<m; i++){
1021       n   = ii[i+1] - ii[i];
1022       aj  = aij->j + ii[i];
1023       aa  = aij->a + ii[i];
1024 
1025       for (j=0; j<n; j++) {
1026         if (PetscAbsScalar(mask[*aj])) {
1027           bb[*ridx] -= *aa*xx[*aj];
1028           *aa        = 0.0;
1029         }
1030         aa++;
1031         aj++;
1032       }
1033       ridx++;
1034     }
1035   } else { /* do not use compressed row format */
1036     m = l->B->rmap->n;
1037     for (i=0; i<m; i++) {
1038       n   = ii[i+1] - ii[i];
1039       aj  = aij->j + ii[i];
1040       aa  = aij->a + ii[i];
1041       for (j=0; j<n; j++) {
1042         if (PetscAbsScalar(mask[*aj])) {
1043           bb[i] -= *aa*xx[*aj];
1044           *aa    = 0.0;
1045         }
1046         aa++;
1047         aj++;
1048       }
1049     }
1050   }
1051   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1052   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1053   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1054   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1055   ierr = PetscFree(lrows);CHKERRQ(ierr);
1056 
1057   /* wait on sends */
1058   if (nsends) {
1059     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1060     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1061     ierr = PetscFree(send_status);CHKERRQ(ierr);
1062   }
1063   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1064   ierr = PetscFree(svalues);CHKERRQ(ierr);
1065 
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMult_MPIAIJ"
1071 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075   PetscInt       nt;
1076 
1077   PetscFunctionBegin;
1078   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1079   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1080   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1081   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1082   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1083   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1089 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1090 {
1091   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1101 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1102 {
1103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1108   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1109   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1116 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1117 {
1118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1119   PetscErrorCode ierr;
1120   PetscBool      merged;
1121 
1122   PetscFunctionBegin;
1123   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1124   /* do nondiagonal part */
1125   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1126   if (!merged) {
1127     /* send it on its way */
1128     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1129     /* do local part */
1130     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1131     /* receive remote parts: note this assumes the values are not actually */
1132     /* added in yy until the next line, */
1133     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1134   } else {
1135     /* do local part */
1136     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1137     /* send it on its way */
1138     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1139     /* values actually were received in the Begin() but we need to call this nop */
1140     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1141   }
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 EXTERN_C_BEGIN
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1148 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1149 {
1150   MPI_Comm       comm;
1151   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1152   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1153   IS             Me,Notme;
1154   PetscErrorCode ierr;
1155   PetscInt       M,N,first,last,*notme,i;
1156   PetscMPIInt    size;
1157 
1158   PetscFunctionBegin;
1159 
1160   /* Easy test: symmetric diagonal block */
1161   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1162   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1163   if (!*f) PetscFunctionReturn(0);
1164   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1165   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1166   if (size == 1) PetscFunctionReturn(0);
1167 
1168   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1169   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1170   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1171   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1172   for (i=0; i<first; i++) notme[i] = i;
1173   for (i=last; i<M; i++) notme[i-last+first] = i;
1174   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1175   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1176   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1177   Aoff = Aoffs[0];
1178   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1179   Boff = Boffs[0];
1180   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1181   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1182   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1183   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1184   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1185   ierr = PetscFree(notme);CHKERRQ(ierr);
1186    PetscFunctionReturn(0);
1187 }
1188 EXTERN_C_END
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1192 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1193 {
1194   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1195   PetscErrorCode ierr;
1196 
1197   PetscFunctionBegin;
1198   /* do nondiagonal part */
1199   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1200   /* send it on its way */
1201   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1202   /* do local part */
1203   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1204   /* receive remote parts */
1205   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 /*
1210   This only works correctly for square matrices where the subblock A->A is the
1211    diagonal block
1212 */
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1215 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1216 {
1217   PetscErrorCode ierr;
1218   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1219 
1220   PetscFunctionBegin;
1221   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1222   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1223   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatScale_MPIAIJ"
1229 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1230 {
1231   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1236   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatDestroy_MPIAIJ"
1242 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1243 {
1244   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1245   PetscErrorCode ierr;
1246 
1247   PetscFunctionBegin;
1248 #if defined(PETSC_USE_LOG)
1249   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1250 #endif
1251   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1252   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1253   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1254   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1255 #if defined (PETSC_USE_CTABLE)
1256   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1257 #else
1258   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1259 #endif
1260   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1262   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1263   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1264   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1265   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1266 
1267   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1281 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1282 {
1283   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1284   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1285   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1286   PetscErrorCode    ierr;
1287   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1288   int               fd;
1289   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1290   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1291   PetscScalar       *column_values;
1292   PetscInt          message_count,flowcontrolcount;
1293 
1294   PetscFunctionBegin;
1295   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1296   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1297   nz   = A->nz + B->nz;
1298   if (!rank) {
1299     header[0] = MAT_FILE_CLASSID;
1300     header[1] = mat->rmap->N;
1301     header[2] = mat->cmap->N;
1302     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1303     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1304     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1305     /* get largest number of rows any processor has */
1306     rlen = mat->rmap->n;
1307     range = mat->rmap->range;
1308     for (i=1; i<size; i++) {
1309       rlen = PetscMax(rlen,range[i+1] - range[i]);
1310     }
1311   } else {
1312     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1313     rlen = mat->rmap->n;
1314   }
1315 
1316   /* load up the local row counts */
1317   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1318   for (i=0; i<mat->rmap->n; i++) {
1319     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1320   }
1321 
1322   /* store the row lengths to the file */
1323   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1324   if (!rank) {
1325     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1326     for (i=1; i<size; i++) {
1327       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1328       rlen = range[i+1] - range[i];
1329       ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1330       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1331     }
1332     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1333   } else {
1334     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1335     ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1336     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1337   }
1338   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1339 
1340   /* load up the local column indices */
1341   nzmax = nz; /* )th processor needs space a largest processor needs */
1342   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1343   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1344   cnt  = 0;
1345   for (i=0; i<mat->rmap->n; i++) {
1346     for (j=B->i[i]; j<B->i[i+1]; j++) {
1347       if ( (col = garray[B->j[j]]) > cstart) break;
1348       column_indices[cnt++] = col;
1349     }
1350     for (k=A->i[i]; k<A->i[i+1]; k++) {
1351       column_indices[cnt++] = A->j[k] + cstart;
1352     }
1353     for (; j<B->i[i+1]; j++) {
1354       column_indices[cnt++] = garray[B->j[j]];
1355     }
1356   }
1357   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1358 
1359   /* store the column indices to the file */
1360    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1361   if (!rank) {
1362     MPI_Status status;
1363     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1364     for (i=1; i<size; i++) {
1365       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1366       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1367       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1368       ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1369       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1370     }
1371      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1372   } else {
1373     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1374     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1375     ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1376     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1377   }
1378   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1379 
1380   /* load up the local column values */
1381   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1382   cnt  = 0;
1383   for (i=0; i<mat->rmap->n; i++) {
1384     for (j=B->i[i]; j<B->i[i+1]; j++) {
1385       if ( garray[B->j[j]] > cstart) break;
1386       column_values[cnt++] = B->a[j];
1387     }
1388     for (k=A->i[i]; k<A->i[i+1]; k++) {
1389       column_values[cnt++] = A->a[k];
1390     }
1391     for (; j<B->i[i+1]; j++) {
1392       column_values[cnt++] = B->a[j];
1393     }
1394   }
1395   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1396 
1397   /* store the column values to the file */
1398    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1399   if (!rank) {
1400     MPI_Status status;
1401     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1402     for (i=1; i<size; i++) {
1403        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1404       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1405       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1406       ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1407       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1408     }
1409     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1410   } else {
1411     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1412     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1413     ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1414     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1415   }
1416   ierr = PetscFree(column_values);CHKERRQ(ierr);
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1422 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1423 {
1424   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1425   PetscErrorCode    ierr;
1426   PetscMPIInt       rank = aij->rank,size = aij->size;
1427   PetscBool         isdraw,iascii,isbinary;
1428   PetscViewer       sviewer;
1429   PetscViewerFormat format;
1430 
1431   PetscFunctionBegin;
1432   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1433   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1434   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1435   if (iascii) {
1436     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1437     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1438       MatInfo    info;
1439       PetscBool  inodes;
1440 
1441       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1442       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1443       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1444       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1445       if (!inodes) {
1446         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1447 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1448       } else {
1449         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1450 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1451       }
1452       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1453       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1454       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1455       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1456       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1457       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1458       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1459       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1460       PetscFunctionReturn(0);
1461     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1462       PetscInt   inodecount,inodelimit,*inodes;
1463       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1464       if (inodes) {
1465         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1466       } else {
1467         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1468       }
1469       PetscFunctionReturn(0);
1470     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1471       PetscFunctionReturn(0);
1472     }
1473   } else if (isbinary) {
1474     if (size == 1) {
1475       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1476       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1477     } else {
1478       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1479     }
1480     PetscFunctionReturn(0);
1481   } else if (isdraw) {
1482     PetscDraw  draw;
1483     PetscBool  isnull;
1484     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1485     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1486   }
1487 
1488   if (size == 1) {
1489     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1490     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1491   } else {
1492     /* assemble the entire matrix onto first processor. */
1493     Mat         A;
1494     Mat_SeqAIJ  *Aloc;
1495     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1496     MatScalar   *a;
1497 
1498     if (mat->rmap->N > 1024) {
1499       PetscBool  flg = PETSC_FALSE;
1500 
1501       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1502       if (!flg) {
1503         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1504       }
1505     }
1506 
1507     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1508     if (!rank) {
1509       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1510     } else {
1511       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1512     }
1513     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1514     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1515     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1516     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1517 
1518     /* copy over the A part */
1519     Aloc = (Mat_SeqAIJ*)aij->A->data;
1520     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1521     row = mat->rmap->rstart;
1522     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1523     for (i=0; i<m; i++) {
1524       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1525       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1526     }
1527     aj = Aloc->j;
1528     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1529 
1530     /* copy over the B part */
1531     Aloc = (Mat_SeqAIJ*)aij->B->data;
1532     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1533     row  = mat->rmap->rstart;
1534     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1535     ct   = cols;
1536     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1537     for (i=0; i<m; i++) {
1538       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1539       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1540     }
1541     ierr = PetscFree(ct);CHKERRQ(ierr);
1542     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1543     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1544     /*
1545        Everyone has to call to draw the matrix since the graphics waits are
1546        synchronized across all processors that share the PetscDraw object
1547     */
1548     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1549     if (!rank) {
1550       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1551       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1552       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1553       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1554     }
1555     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1556     ierr = MatDestroy(&A);CHKERRQ(ierr);
1557   }
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatView_MPIAIJ"
1563 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1564 {
1565   PetscErrorCode ierr;
1566   PetscBool      iascii,isdraw,issocket,isbinary;
1567 
1568   PetscFunctionBegin;
1569   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1570   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1571   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1572   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1573   if (iascii || isdraw || isbinary || issocket) {
1574     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1575   } else {
1576     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1577   }
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatSOR_MPIAIJ"
1583 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1584 {
1585   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1586   PetscErrorCode ierr;
1587   Vec            bb1 = 0;
1588   PetscBool      hasop;
1589 
1590   PetscFunctionBegin;
1591   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1592     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1593   }
1594 
1595   if (flag == SOR_APPLY_UPPER) {
1596     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1597     PetscFunctionReturn(0);
1598   }
1599 
1600   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1601     if (flag & SOR_ZERO_INITIAL_GUESS) {
1602       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1603       its--;
1604     }
1605 
1606     while (its--) {
1607       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1608       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1609 
1610       /* update rhs: bb1 = bb - B*x */
1611       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1612       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1613 
1614       /* local sweep */
1615       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1616     }
1617   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1618     if (flag & SOR_ZERO_INITIAL_GUESS) {
1619       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1620       its--;
1621     }
1622     while (its--) {
1623       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1624       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625 
1626       /* update rhs: bb1 = bb - B*x */
1627       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1628       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1629 
1630       /* local sweep */
1631       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1632     }
1633   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1634     if (flag & SOR_ZERO_INITIAL_GUESS) {
1635       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1636       its--;
1637     }
1638     while (its--) {
1639       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1640       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641 
1642       /* update rhs: bb1 = bb - B*x */
1643       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1644       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1645 
1646       /* local sweep */
1647       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1648     }
1649   }  else if (flag & SOR_EISENSTAT) {
1650     Vec         xx1;
1651 
1652     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1653     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1654 
1655     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1656     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657     if (!mat->diag) {
1658       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1659       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1660     }
1661     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1662     if (hasop) {
1663       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1664     } else {
1665       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1666     }
1667     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1668 
1669     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1670 
1671     /* local sweep */
1672     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1673     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1674     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1675   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1676 
1677   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatPermute_MPIAIJ"
1683 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1684 {
1685   MPI_Comm       comm,pcomm;
1686   PetscInt       first,local_size,nrows;
1687   const PetscInt *rows;
1688   PetscMPIInt    size;
1689   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1690   PetscErrorCode ierr;
1691 
1692   PetscFunctionBegin;
1693   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1694   /* make a collective version of 'rowp' */
1695   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1696   if (pcomm==comm) {
1697     crowp = rowp;
1698   } else {
1699     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1700     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1701     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1702     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1703   }
1704   /* collect the global row permutation and invert it */
1705   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1706   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1707   if (pcomm!=comm) {
1708     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1709   }
1710   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1711   /* get the local target indices */
1712   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1713   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1714   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1715   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1716   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1717   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1718   /* the column permutation is so much easier;
1719      make a local version of 'colp' and invert it */
1720   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1721   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1722   if (size==1) {
1723     lcolp = colp;
1724   } else {
1725     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1726     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1727     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1728   }
1729   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1730   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1731   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1732   if (size>1) {
1733     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1734     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1735   }
1736   /* now we just get the submatrix */
1737   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1738   /* clean up */
1739   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1740   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 #undef __FUNCT__
1745 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1746 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1747 {
1748   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1749   Mat            A = mat->A,B = mat->B;
1750   PetscErrorCode ierr;
1751   PetscReal      isend[5],irecv[5];
1752 
1753   PetscFunctionBegin;
1754   info->block_size     = 1.0;
1755   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1756   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1757   isend[3] = info->memory;  isend[4] = info->mallocs;
1758   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1759   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1760   isend[3] += info->memory;  isend[4] += info->mallocs;
1761   if (flag == MAT_LOCAL) {
1762     info->nz_used      = isend[0];
1763     info->nz_allocated = isend[1];
1764     info->nz_unneeded  = isend[2];
1765     info->memory       = isend[3];
1766     info->mallocs      = isend[4];
1767   } else if (flag == MAT_GLOBAL_MAX) {
1768     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1769     info->nz_used      = irecv[0];
1770     info->nz_allocated = irecv[1];
1771     info->nz_unneeded  = irecv[2];
1772     info->memory       = irecv[3];
1773     info->mallocs      = irecv[4];
1774   } else if (flag == MAT_GLOBAL_SUM) {
1775     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1776     info->nz_used      = irecv[0];
1777     info->nz_allocated = irecv[1];
1778     info->nz_unneeded  = irecv[2];
1779     info->memory       = irecv[3];
1780     info->mallocs      = irecv[4];
1781   }
1782   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1783   info->fill_ratio_needed = 0;
1784   info->factor_mallocs    = 0;
1785 
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 #undef __FUNCT__
1790 #define __FUNCT__ "MatSetOption_MPIAIJ"
1791 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1792 {
1793   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1794   PetscErrorCode ierr;
1795 
1796   PetscFunctionBegin;
1797   switch (op) {
1798   case MAT_NEW_NONZERO_LOCATIONS:
1799   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1800   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1801   case MAT_KEEP_NONZERO_PATTERN:
1802   case MAT_NEW_NONZERO_LOCATION_ERR:
1803   case MAT_USE_INODES:
1804   case MAT_IGNORE_ZERO_ENTRIES:
1805     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1806     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1807     break;
1808   case MAT_ROW_ORIENTED:
1809     a->roworiented = flg;
1810     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1811     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1812     break;
1813   case MAT_NEW_DIAGONALS:
1814     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1815     break;
1816   case MAT_IGNORE_OFF_PROC_ENTRIES:
1817     a->donotstash = flg;
1818     break;
1819   case MAT_SPD:
1820     A->spd_set                         = PETSC_TRUE;
1821     A->spd                             = flg;
1822     if (flg) {
1823       A->symmetric                     = PETSC_TRUE;
1824       A->structurally_symmetric        = PETSC_TRUE;
1825       A->symmetric_set                 = PETSC_TRUE;
1826       A->structurally_symmetric_set    = PETSC_TRUE;
1827     }
1828     break;
1829   case MAT_SYMMETRIC:
1830     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1831     break;
1832   case MAT_STRUCTURALLY_SYMMETRIC:
1833     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1834     break;
1835   case MAT_HERMITIAN:
1836     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1837     break;
1838   case MAT_SYMMETRY_ETERNAL:
1839     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1840     break;
1841   default:
1842     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1843   }
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 #undef __FUNCT__
1848 #define __FUNCT__ "MatGetRow_MPIAIJ"
1849 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1850 {
1851   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1852   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1853   PetscErrorCode ierr;
1854   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1855   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1856   PetscInt       *cmap,*idx_p;
1857 
1858   PetscFunctionBegin;
1859   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1860   mat->getrowactive = PETSC_TRUE;
1861 
1862   if (!mat->rowvalues && (idx || v)) {
1863     /*
1864         allocate enough space to hold information from the longest row.
1865     */
1866     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1867     PetscInt   max = 1,tmp;
1868     for (i=0; i<matin->rmap->n; i++) {
1869       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1870       if (max < tmp) { max = tmp; }
1871     }
1872     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1873   }
1874 
1875   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1876   lrow = row - rstart;
1877 
1878   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1879   if (!v)   {pvA = 0; pvB = 0;}
1880   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1881   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1882   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1883   nztot = nzA + nzB;
1884 
1885   cmap  = mat->garray;
1886   if (v  || idx) {
1887     if (nztot) {
1888       /* Sort by increasing column numbers, assuming A and B already sorted */
1889       PetscInt imark = -1;
1890       if (v) {
1891         *v = v_p = mat->rowvalues;
1892         for (i=0; i<nzB; i++) {
1893           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1894           else break;
1895         }
1896         imark = i;
1897         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1898         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1899       }
1900       if (idx) {
1901         *idx = idx_p = mat->rowindices;
1902         if (imark > -1) {
1903           for (i=0; i<imark; i++) {
1904             idx_p[i] = cmap[cworkB[i]];
1905           }
1906         } else {
1907           for (i=0; i<nzB; i++) {
1908             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1909             else break;
1910           }
1911           imark = i;
1912         }
1913         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1914         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1915       }
1916     } else {
1917       if (idx) *idx = 0;
1918       if (v)   *v   = 0;
1919     }
1920   }
1921   *nz = nztot;
1922   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1923   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 #undef __FUNCT__
1928 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1929 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1930 {
1931   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1932 
1933   PetscFunctionBegin;
1934   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1935   aij->getrowactive = PETSC_FALSE;
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatNorm_MPIAIJ"
1941 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1942 {
1943   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1944   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1945   PetscErrorCode ierr;
1946   PetscInt       i,j,cstart = mat->cmap->rstart;
1947   PetscReal      sum = 0.0;
1948   MatScalar      *v;
1949 
1950   PetscFunctionBegin;
1951   if (aij->size == 1) {
1952     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1953   } else {
1954     if (type == NORM_FROBENIUS) {
1955       v = amat->a;
1956       for (i=0; i<amat->nz; i++) {
1957 #if defined(PETSC_USE_COMPLEX)
1958         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1959 #else
1960         sum += (*v)*(*v); v++;
1961 #endif
1962       }
1963       v = bmat->a;
1964       for (i=0; i<bmat->nz; i++) {
1965 #if defined(PETSC_USE_COMPLEX)
1966         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1967 #else
1968         sum += (*v)*(*v); v++;
1969 #endif
1970       }
1971       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1972       *norm = PetscSqrtReal(*norm);
1973     } else if (type == NORM_1) { /* max column norm */
1974       PetscReal *tmp,*tmp2;
1975       PetscInt  *jj,*garray = aij->garray;
1976       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1977       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1978       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1979       *norm = 0.0;
1980       v = amat->a; jj = amat->j;
1981       for (j=0; j<amat->nz; j++) {
1982         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1983       }
1984       v = bmat->a; jj = bmat->j;
1985       for (j=0; j<bmat->nz; j++) {
1986         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1987       }
1988       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1989       for (j=0; j<mat->cmap->N; j++) {
1990         if (tmp2[j] > *norm) *norm = tmp2[j];
1991       }
1992       ierr = PetscFree(tmp);CHKERRQ(ierr);
1993       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1994     } else if (type == NORM_INFINITY) { /* max row norm */
1995       PetscReal ntemp = 0.0;
1996       for (j=0; j<aij->A->rmap->n; j++) {
1997         v = amat->a + amat->i[j];
1998         sum = 0.0;
1999         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2000           sum += PetscAbsScalar(*v); v++;
2001         }
2002         v = bmat->a + bmat->i[j];
2003         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2004           sum += PetscAbsScalar(*v); v++;
2005         }
2006         if (sum > ntemp) ntemp = sum;
2007       }
2008       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2009     } else {
2010       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2011     }
2012   }
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 #undef __FUNCT__
2017 #define __FUNCT__ "MatTranspose_MPIAIJ"
2018 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2019 {
2020   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2021   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2022   PetscErrorCode ierr;
2023   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2024   PetscInt       cstart=A->cmap->rstart,ncol;
2025   Mat            B;
2026   MatScalar      *array;
2027 
2028   PetscFunctionBegin;
2029   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2030 
2031   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2032   ai = Aloc->i; aj = Aloc->j;
2033   bi = Bloc->i; bj = Bloc->j;
2034   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2035     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2036     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2037     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2038     for (i=0; i<ai[ma]; i++){
2039       d_nnz[aj[i]] ++;
2040       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2041     }
2042 
2043     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2044     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2045     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2046     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2047     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2048   } else {
2049     B = *matout;
2050   }
2051 
2052   /* copy over the A part */
2053   array = Aloc->a;
2054   row = A->rmap->rstart;
2055   for (i=0; i<ma; i++) {
2056     ncol = ai[i+1]-ai[i];
2057     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2058     row++; array += ncol; aj += ncol;
2059   }
2060   aj = Aloc->j;
2061   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2062 
2063   /* copy over the B part */
2064   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2065   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2066   array = Bloc->a;
2067   row = A->rmap->rstart;
2068   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2069   cols_tmp = cols;
2070   for (i=0; i<mb; i++) {
2071     ncol = bi[i+1]-bi[i];
2072     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2073     row++; array += ncol; cols_tmp += ncol;
2074   }
2075   ierr = PetscFree(cols);CHKERRQ(ierr);
2076 
2077   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2080     *matout = B;
2081   } else {
2082     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2089 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2090 {
2091   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2092   Mat            a = aij->A,b = aij->B;
2093   PetscErrorCode ierr;
2094   PetscInt       s1,s2,s3;
2095 
2096   PetscFunctionBegin;
2097   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2098   if (rr) {
2099     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2100     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2101     /* Overlap communication with computation. */
2102     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2103   }
2104   if (ll) {
2105     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2106     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2107     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2108   }
2109   /* scale  the diagonal block */
2110   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2111 
2112   if (rr) {
2113     /* Do a scatter end and then right scale the off-diagonal block */
2114     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2115     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2116   }
2117 
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 #undef __FUNCT__
2122 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2123 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2124 {
2125   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2126   PetscErrorCode ierr;
2127 
2128   PetscFunctionBegin;
2129   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2130   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2131   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2132   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2133   PetscFunctionReturn(0);
2134 }
2135 #undef __FUNCT__
2136 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2137 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2138 {
2139   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2140   PetscErrorCode ierr;
2141 
2142   PetscFunctionBegin;
2143   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 #undef __FUNCT__
2148 #define __FUNCT__ "MatEqual_MPIAIJ"
2149 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2150 {
2151   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2152   Mat            a,b,c,d;
2153   PetscBool      flg;
2154   PetscErrorCode ierr;
2155 
2156   PetscFunctionBegin;
2157   a = matA->A; b = matA->B;
2158   c = matB->A; d = matB->B;
2159 
2160   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2161   if (flg) {
2162     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2163   }
2164   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatCopy_MPIAIJ"
2170 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2171 {
2172   PetscErrorCode ierr;
2173   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2174   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2175 
2176   PetscFunctionBegin;
2177   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2178   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2179     /* because of the column compression in the off-processor part of the matrix a->B,
2180        the number of columns in a->B and b->B may be different, hence we cannot call
2181        the MatCopy() directly on the two parts. If need be, we can provide a more
2182        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2183        then copying the submatrices */
2184     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2185   } else {
2186     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2187     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2188   }
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2194 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2195 {
2196   PetscErrorCode ierr;
2197 
2198   PetscFunctionBegin;
2199   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 #undef __FUNCT__
2204 #define __FUNCT__ "MatAXPY_MPIAIJ"
2205 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2206 {
2207   PetscErrorCode ierr;
2208   PetscInt       i;
2209   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2210   PetscBLASInt   bnz,one=1;
2211   Mat_SeqAIJ     *x,*y;
2212 
2213   PetscFunctionBegin;
2214   if (str == SAME_NONZERO_PATTERN) {
2215     PetscScalar alpha = a;
2216     x = (Mat_SeqAIJ *)xx->A->data;
2217     y = (Mat_SeqAIJ *)yy->A->data;
2218     bnz = PetscBLASIntCast(x->nz);
2219     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2220     x = (Mat_SeqAIJ *)xx->B->data;
2221     y = (Mat_SeqAIJ *)yy->B->data;
2222     bnz = PetscBLASIntCast(x->nz);
2223     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2224   } else if (str == SUBSET_NONZERO_PATTERN) {
2225     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2226 
2227     x = (Mat_SeqAIJ *)xx->B->data;
2228     y = (Mat_SeqAIJ *)yy->B->data;
2229     if (y->xtoy && y->XtoY != xx->B) {
2230       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2231       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2232     }
2233     if (!y->xtoy) { /* get xtoy */
2234       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2235       y->XtoY = xx->B;
2236       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2237     }
2238     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2239   } else {
2240     Mat B;
2241     PetscInt *nnz_d,*nnz_o;
2242     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2243     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2244     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2245     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2246     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2247     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2248     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2249     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2250     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2251     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2252     ierr = MatHeaderReplace(Y,B);
2253     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2254     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2255   }
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2260 
2261 #undef __FUNCT__
2262 #define __FUNCT__ "MatConjugate_MPIAIJ"
2263 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2264 {
2265 #if defined(PETSC_USE_COMPLEX)
2266   PetscErrorCode ierr;
2267   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2268 
2269   PetscFunctionBegin;
2270   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2271   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2272 #else
2273   PetscFunctionBegin;
2274 #endif
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 #undef __FUNCT__
2279 #define __FUNCT__ "MatRealPart_MPIAIJ"
2280 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2281 {
2282   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2283   PetscErrorCode ierr;
2284 
2285   PetscFunctionBegin;
2286   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2287   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2294 {
2295   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2300   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 #ifdef PETSC_HAVE_PBGL
2305 
2306 #include <boost/parallel/mpi/bsp_process_group.hpp>
2307 #include <boost/graph/distributed/ilu_default_graph.hpp>
2308 #include <boost/graph/distributed/ilu_0_block.hpp>
2309 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2310 #include <boost/graph/distributed/petsc/interface.hpp>
2311 #include <boost/multi_array.hpp>
2312 #include <boost/parallel/distributed_property_map->hpp>
2313 
2314 #undef __FUNCT__
2315 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2316 /*
2317   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2318 */
2319 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2320 {
2321   namespace petsc = boost::distributed::petsc;
2322 
2323   namespace graph_dist = boost::graph::distributed;
2324   using boost::graph::distributed::ilu_default::process_group_type;
2325   using boost::graph::ilu_permuted;
2326 
2327   PetscBool       row_identity, col_identity;
2328   PetscContainer  c;
2329   PetscInt        m, n, M, N;
2330   PetscErrorCode  ierr;
2331 
2332   PetscFunctionBegin;
2333   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2334   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2335   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2336   if (!row_identity || !col_identity) {
2337     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2338   }
2339 
2340   process_group_type pg;
2341   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2342   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2343   lgraph_type&   level_graph = *lgraph_p;
2344   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2345 
2346   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2347   ilu_permuted(level_graph);
2348 
2349   /* put together the new matrix */
2350   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2351   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2352   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2353   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2354   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2355   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2356   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2357 
2358   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2359   ierr = PetscContainerSetPointer(c, lgraph_p);
2360   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2361   ierr = PetscContainerDestroy(&c);
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2367 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2368 {
2369   PetscFunctionBegin;
2370   PetscFunctionReturn(0);
2371 }
2372 
2373 #undef __FUNCT__
2374 #define __FUNCT__ "MatSolve_MPIAIJ"
2375 /*
2376   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2377 */
2378 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2379 {
2380   namespace graph_dist = boost::graph::distributed;
2381 
2382   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2383   lgraph_type*   lgraph_p;
2384   PetscContainer c;
2385   PetscErrorCode ierr;
2386 
2387   PetscFunctionBegin;
2388   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2389   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2390   ierr = VecCopy(b, x);CHKERRQ(ierr);
2391 
2392   PetscScalar* array_x;
2393   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2394   PetscInt sx;
2395   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2396 
2397   PetscScalar* array_b;
2398   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2399   PetscInt sb;
2400   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2401 
2402   lgraph_type&   level_graph = *lgraph_p;
2403   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2404 
2405   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2406   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2407                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2408 
2409   typedef boost::iterator_property_map<array_ref_type::iterator,
2410                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2411   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2412                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2413 
2414   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2415 
2416   PetscFunctionReturn(0);
2417 }
2418 #endif
2419 
2420 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2421   PetscInt       nzlocal,nsends,nrecvs;
2422   PetscMPIInt    *send_rank,*recv_rank;
2423   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2424   PetscScalar    *sbuf_a,**rbuf_a;
2425   PetscErrorCode (*Destroy)(Mat);
2426 } Mat_Redundant;
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2430 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2431 {
2432   PetscErrorCode       ierr;
2433   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2434   PetscInt             i;
2435 
2436   PetscFunctionBegin;
2437   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2438   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2439   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2440   for (i=0; i<redund->nrecvs; i++){
2441     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2442     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2443   }
2444   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2445   ierr = PetscFree(redund);CHKERRQ(ierr);
2446   PetscFunctionReturn(0);
2447 }
2448 
2449 #undef __FUNCT__
2450 #define __FUNCT__ "MatDestroy_MatRedundant"
2451 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2452 {
2453   PetscErrorCode  ierr;
2454   PetscContainer  container;
2455   Mat_Redundant   *redund=PETSC_NULL;
2456 
2457   PetscFunctionBegin;
2458   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2459   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2460   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2461   A->ops->destroy = redund->Destroy;
2462   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2463   if (A->ops->destroy) {
2464     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2465   }
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 #undef __FUNCT__
2470 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2471 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2472 {
2473   PetscMPIInt    rank,size;
2474   MPI_Comm       comm=((PetscObject)mat)->comm;
2475   PetscErrorCode ierr;
2476   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2477   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2478   PetscInt       *rowrange=mat->rmap->range;
2479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2480   Mat            A=aij->A,B=aij->B,C=*matredundant;
2481   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2482   PetscScalar    *sbuf_a;
2483   PetscInt       nzlocal=a->nz+b->nz;
2484   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2485   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2486   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2487   MatScalar      *aworkA,*aworkB;
2488   PetscScalar    *vals;
2489   PetscMPIInt    tag1,tag2,tag3,imdex;
2490   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2491                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2492   MPI_Status     recv_status,*send_status;
2493   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2494   PetscInt       **rbuf_j=PETSC_NULL;
2495   PetscScalar    **rbuf_a=PETSC_NULL;
2496   Mat_Redundant  *redund=PETSC_NULL;
2497   PetscContainer container;
2498 
2499   PetscFunctionBegin;
2500   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2501   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2502 
2503   if (reuse == MAT_REUSE_MATRIX) {
2504     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2505     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2506     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2507     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2508     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2509     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2510     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2511     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2512 
2513     nsends    = redund->nsends;
2514     nrecvs    = redund->nrecvs;
2515     send_rank = redund->send_rank;
2516     recv_rank = redund->recv_rank;
2517     sbuf_nz   = redund->sbuf_nz;
2518     rbuf_nz   = redund->rbuf_nz;
2519     sbuf_j    = redund->sbuf_j;
2520     sbuf_a    = redund->sbuf_a;
2521     rbuf_j    = redund->rbuf_j;
2522     rbuf_a    = redund->rbuf_a;
2523   }
2524 
2525   if (reuse == MAT_INITIAL_MATRIX){
2526     PetscMPIInt  subrank,subsize;
2527     PetscInt     nleftover,np_subcomm;
2528     /* get the destination processors' id send_rank, nsends and nrecvs */
2529     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2530     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2531     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2532     np_subcomm = size/nsubcomm;
2533     nleftover  = size - nsubcomm*np_subcomm;
2534     nsends = 0; nrecvs = 0;
2535     for (i=0; i<size; i++){ /* i=rank*/
2536       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2537         send_rank[nsends] = i; nsends++;
2538         recv_rank[nrecvs++] = i;
2539       }
2540     }
2541     if (rank >= size - nleftover){/* this proc is a leftover processor */
2542       i = size-nleftover-1;
2543       j = 0;
2544       while (j < nsubcomm - nleftover){
2545         send_rank[nsends++] = i;
2546         i--; j++;
2547       }
2548     }
2549 
2550     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2551       for (i=0; i<nleftover; i++){
2552         recv_rank[nrecvs++] = size-nleftover+i;
2553       }
2554     }
2555 
2556     /* allocate sbuf_j, sbuf_a */
2557     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2558     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2559     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2560   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2561 
2562   /* copy mat's local entries into the buffers */
2563   if (reuse == MAT_INITIAL_MATRIX){
2564     rownz_max = 0;
2565     rptr = sbuf_j;
2566     cols = sbuf_j + rend-rstart + 1;
2567     vals = sbuf_a;
2568     rptr[0] = 0;
2569     for (i=0; i<rend-rstart; i++){
2570       row = i + rstart;
2571       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2572       ncols  = nzA + nzB;
2573       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2574       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2575       /* load the column indices for this row into cols */
2576       lwrite = 0;
2577       for (l=0; l<nzB; l++) {
2578         if ((ctmp = bmap[cworkB[l]]) < cstart){
2579           vals[lwrite]   = aworkB[l];
2580           cols[lwrite++] = ctmp;
2581         }
2582       }
2583       for (l=0; l<nzA; l++){
2584         vals[lwrite]   = aworkA[l];
2585         cols[lwrite++] = cstart + cworkA[l];
2586       }
2587       for (l=0; l<nzB; l++) {
2588         if ((ctmp = bmap[cworkB[l]]) >= cend){
2589           vals[lwrite]   = aworkB[l];
2590           cols[lwrite++] = ctmp;
2591         }
2592       }
2593       vals += ncols;
2594       cols += ncols;
2595       rptr[i+1] = rptr[i] + ncols;
2596       if (rownz_max < ncols) rownz_max = ncols;
2597     }
2598     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2599   } else { /* only copy matrix values into sbuf_a */
2600     rptr = sbuf_j;
2601     vals = sbuf_a;
2602     rptr[0] = 0;
2603     for (i=0; i<rend-rstart; i++){
2604       row = i + rstart;
2605       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2606       ncols  = nzA + nzB;
2607       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2608       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2609       lwrite = 0;
2610       for (l=0; l<nzB; l++) {
2611         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2612       }
2613       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2614       for (l=0; l<nzB; l++) {
2615         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2616       }
2617       vals += ncols;
2618       rptr[i+1] = rptr[i] + ncols;
2619     }
2620   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2621 
2622   /* send nzlocal to others, and recv other's nzlocal */
2623   /*--------------------------------------------------*/
2624   if (reuse == MAT_INITIAL_MATRIX){
2625     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2626     s_waits2 = s_waits3 + nsends;
2627     s_waits1 = s_waits2 + nsends;
2628     r_waits1 = s_waits1 + nsends;
2629     r_waits2 = r_waits1 + nrecvs;
2630     r_waits3 = r_waits2 + nrecvs;
2631   } else {
2632     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2633     r_waits3 = s_waits3 + nsends;
2634   }
2635 
2636   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2637   if (reuse == MAT_INITIAL_MATRIX){
2638     /* get new tags to keep the communication clean */
2639     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2640     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2641     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2642 
2643     /* post receives of other's nzlocal */
2644     for (i=0; i<nrecvs; i++){
2645       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2646     }
2647     /* send nzlocal to others */
2648     for (i=0; i<nsends; i++){
2649       sbuf_nz[i] = nzlocal;
2650       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2651     }
2652     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2653     count = nrecvs;
2654     while (count) {
2655       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2656       recv_rank[imdex] = recv_status.MPI_SOURCE;
2657       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2658       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2659 
2660       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2661       rbuf_nz[imdex] += i + 2;
2662       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2663       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2664       count--;
2665     }
2666     /* wait on sends of nzlocal */
2667     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2668     /* send mat->i,j to others, and recv from other's */
2669     /*------------------------------------------------*/
2670     for (i=0; i<nsends; i++){
2671       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2672       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2673     }
2674     /* wait on receives of mat->i,j */
2675     /*------------------------------*/
2676     count = nrecvs;
2677     while (count) {
2678       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2679       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2680       count--;
2681     }
2682     /* wait on sends of mat->i,j */
2683     /*---------------------------*/
2684     if (nsends) {
2685       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2686     }
2687   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2688 
2689   /* post receives, send and receive mat->a */
2690   /*----------------------------------------*/
2691   for (imdex=0; imdex<nrecvs; imdex++) {
2692     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2693   }
2694   for (i=0; i<nsends; i++){
2695     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2696   }
2697   count = nrecvs;
2698   while (count) {
2699     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2700     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2701     count--;
2702   }
2703   if (nsends) {
2704     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2705   }
2706 
2707   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2708 
2709   /* create redundant matrix */
2710   /*-------------------------*/
2711   if (reuse == MAT_INITIAL_MATRIX){
2712     /* compute rownz_max for preallocation */
2713     for (imdex=0; imdex<nrecvs; imdex++){
2714       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2715       rptr = rbuf_j[imdex];
2716       for (i=0; i<j; i++){
2717         ncols = rptr[i+1] - rptr[i];
2718         if (rownz_max < ncols) rownz_max = ncols;
2719       }
2720     }
2721 
2722     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2723     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2724     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2725     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2726     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2727   } else {
2728     C = *matredundant;
2729   }
2730 
2731   /* insert local matrix entries */
2732   rptr = sbuf_j;
2733   cols = sbuf_j + rend-rstart + 1;
2734   vals = sbuf_a;
2735   for (i=0; i<rend-rstart; i++){
2736     row   = i + rstart;
2737     ncols = rptr[i+1] - rptr[i];
2738     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2739     vals += ncols;
2740     cols += ncols;
2741   }
2742   /* insert received matrix entries */
2743   for (imdex=0; imdex<nrecvs; imdex++){
2744     rstart = rowrange[recv_rank[imdex]];
2745     rend   = rowrange[recv_rank[imdex]+1];
2746     rptr = rbuf_j[imdex];
2747     cols = rbuf_j[imdex] + rend-rstart + 1;
2748     vals = rbuf_a[imdex];
2749     for (i=0; i<rend-rstart; i++){
2750       row   = i + rstart;
2751       ncols = rptr[i+1] - rptr[i];
2752       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2753       vals += ncols;
2754       cols += ncols;
2755     }
2756   }
2757   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2758   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2759   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2760   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2761   if (reuse == MAT_INITIAL_MATRIX) {
2762     PetscContainer container;
2763     *matredundant = C;
2764     /* create a supporting struct and attach it to C for reuse */
2765     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2766     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2767     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2768     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2769     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2770     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2771 
2772     redund->nzlocal = nzlocal;
2773     redund->nsends  = nsends;
2774     redund->nrecvs  = nrecvs;
2775     redund->send_rank = send_rank;
2776     redund->recv_rank = recv_rank;
2777     redund->sbuf_nz = sbuf_nz;
2778     redund->rbuf_nz = rbuf_nz;
2779     redund->sbuf_j  = sbuf_j;
2780     redund->sbuf_a  = sbuf_a;
2781     redund->rbuf_j  = rbuf_j;
2782     redund->rbuf_a  = rbuf_a;
2783 
2784     redund->Destroy = C->ops->destroy;
2785     C->ops->destroy = MatDestroy_MatRedundant;
2786   }
2787   PetscFunctionReturn(0);
2788 }
2789 
2790 #undef __FUNCT__
2791 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2792 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2793 {
2794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2795   PetscErrorCode ierr;
2796   PetscInt       i,*idxb = 0;
2797   PetscScalar    *va,*vb;
2798   Vec            vtmp;
2799 
2800   PetscFunctionBegin;
2801   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2802   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2803   if (idx) {
2804     for (i=0; i<A->rmap->n; i++) {
2805       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2806     }
2807   }
2808 
2809   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2810   if (idx) {
2811     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2812   }
2813   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2814   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2815 
2816   for (i=0; i<A->rmap->n; i++){
2817     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2818       va[i] = vb[i];
2819       if (idx) idx[i] = a->garray[idxb[i]];
2820     }
2821   }
2822 
2823   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2824   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2825   ierr = PetscFree(idxb);CHKERRQ(ierr);
2826   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2827   PetscFunctionReturn(0);
2828 }
2829 
2830 #undef __FUNCT__
2831 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2832 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2833 {
2834   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2835   PetscErrorCode ierr;
2836   PetscInt       i,*idxb = 0;
2837   PetscScalar    *va,*vb;
2838   Vec            vtmp;
2839 
2840   PetscFunctionBegin;
2841   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2842   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2843   if (idx) {
2844     for (i=0; i<A->cmap->n; i++) {
2845       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2846     }
2847   }
2848 
2849   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2850   if (idx) {
2851     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2852   }
2853   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2854   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2855 
2856   for (i=0; i<A->rmap->n; i++){
2857     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2858       va[i] = vb[i];
2859       if (idx) idx[i] = a->garray[idxb[i]];
2860     }
2861   }
2862 
2863   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2864   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2865   ierr = PetscFree(idxb);CHKERRQ(ierr);
2866   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2867   PetscFunctionReturn(0);
2868 }
2869 
2870 #undef __FUNCT__
2871 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2872 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2873 {
2874   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2875   PetscInt       n      = A->rmap->n;
2876   PetscInt       cstart = A->cmap->rstart;
2877   PetscInt      *cmap   = mat->garray;
2878   PetscInt      *diagIdx, *offdiagIdx;
2879   Vec            diagV, offdiagV;
2880   PetscScalar   *a, *diagA, *offdiagA;
2881   PetscInt       r;
2882   PetscErrorCode ierr;
2883 
2884   PetscFunctionBegin;
2885   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2886   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2887   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2888   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2889   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2890   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2891   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2892   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2893   for(r = 0; r < n; ++r) {
2894     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2895       a[r]   = diagA[r];
2896       idx[r] = cstart + diagIdx[r];
2897     } else {
2898       a[r]   = offdiagA[r];
2899       idx[r] = cmap[offdiagIdx[r]];
2900     }
2901   }
2902   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2903   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2904   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2905   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2906   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2907   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2908   PetscFunctionReturn(0);
2909 }
2910 
2911 #undef __FUNCT__
2912 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2913 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2914 {
2915   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2916   PetscInt       n      = A->rmap->n;
2917   PetscInt       cstart = A->cmap->rstart;
2918   PetscInt      *cmap   = mat->garray;
2919   PetscInt      *diagIdx, *offdiagIdx;
2920   Vec            diagV, offdiagV;
2921   PetscScalar   *a, *diagA, *offdiagA;
2922   PetscInt       r;
2923   PetscErrorCode ierr;
2924 
2925   PetscFunctionBegin;
2926   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2927   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2928   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2929   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2930   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2931   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2932   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2933   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2934   for(r = 0; r < n; ++r) {
2935     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2936       a[r]   = diagA[r];
2937       idx[r] = cstart + diagIdx[r];
2938     } else {
2939       a[r]   = offdiagA[r];
2940       idx[r] = cmap[offdiagIdx[r]];
2941     }
2942   }
2943   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2944   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2945   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2946   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2947   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2948   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2949   PetscFunctionReturn(0);
2950 }
2951 
2952 #undef __FUNCT__
2953 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2954 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2955 {
2956   PetscErrorCode ierr;
2957   Mat            *dummy;
2958 
2959   PetscFunctionBegin;
2960   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2961   *newmat = *dummy;
2962   ierr = PetscFree(dummy);CHKERRQ(ierr);
2963   PetscFunctionReturn(0);
2964 }
2965 
2966 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2967 
2968 #undef __FUNCT__
2969 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2970 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2971 {
2972   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2973   PetscErrorCode ierr;
2974 
2975   PetscFunctionBegin;
2976   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 
2981 /* -------------------------------------------------------------------*/
2982 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2983        MatGetRow_MPIAIJ,
2984        MatRestoreRow_MPIAIJ,
2985        MatMult_MPIAIJ,
2986 /* 4*/ MatMultAdd_MPIAIJ,
2987        MatMultTranspose_MPIAIJ,
2988        MatMultTransposeAdd_MPIAIJ,
2989 #ifdef PETSC_HAVE_PBGL
2990        MatSolve_MPIAIJ,
2991 #else
2992        0,
2993 #endif
2994        0,
2995        0,
2996 /*10*/ 0,
2997        0,
2998        0,
2999        MatSOR_MPIAIJ,
3000        MatTranspose_MPIAIJ,
3001 /*15*/ MatGetInfo_MPIAIJ,
3002        MatEqual_MPIAIJ,
3003        MatGetDiagonal_MPIAIJ,
3004        MatDiagonalScale_MPIAIJ,
3005        MatNorm_MPIAIJ,
3006 /*20*/ MatAssemblyBegin_MPIAIJ,
3007        MatAssemblyEnd_MPIAIJ,
3008        MatSetOption_MPIAIJ,
3009        MatZeroEntries_MPIAIJ,
3010 /*24*/ MatZeroRows_MPIAIJ,
3011        0,
3012 #ifdef PETSC_HAVE_PBGL
3013        0,
3014 #else
3015        0,
3016 #endif
3017        0,
3018        0,
3019 /*29*/ MatSetUpPreallocation_MPIAIJ,
3020 #ifdef PETSC_HAVE_PBGL
3021        0,
3022 #else
3023        0,
3024 #endif
3025        0,
3026        0,
3027        0,
3028 /*34*/ MatDuplicate_MPIAIJ,
3029        0,
3030        0,
3031        0,
3032        0,
3033 /*39*/ MatAXPY_MPIAIJ,
3034        MatGetSubMatrices_MPIAIJ,
3035        MatIncreaseOverlap_MPIAIJ,
3036        MatGetValues_MPIAIJ,
3037        MatCopy_MPIAIJ,
3038 /*44*/ MatGetRowMax_MPIAIJ,
3039        MatScale_MPIAIJ,
3040        0,
3041        0,
3042        MatZeroRowsColumns_MPIAIJ,
3043 /*49*/ MatSetBlockSize_MPIAIJ,
3044        0,
3045        0,
3046        0,
3047        0,
3048 /*54*/ MatFDColoringCreate_MPIAIJ,
3049        0,
3050        MatSetUnfactored_MPIAIJ,
3051        MatPermute_MPIAIJ,
3052        0,
3053 /*59*/ MatGetSubMatrix_MPIAIJ,
3054        MatDestroy_MPIAIJ,
3055        MatView_MPIAIJ,
3056        0,
3057        0,
3058 /*64*/ 0,
3059        0,
3060        0,
3061        0,
3062        0,
3063 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3064        MatGetRowMinAbs_MPIAIJ,
3065        0,
3066        MatSetColoring_MPIAIJ,
3067 #if defined(PETSC_HAVE_ADIC)
3068        MatSetValuesAdic_MPIAIJ,
3069 #else
3070        0,
3071 #endif
3072        MatSetValuesAdifor_MPIAIJ,
3073 /*75*/ MatFDColoringApply_AIJ,
3074        0,
3075        0,
3076        0,
3077        0,
3078 /*80*/ 0,
3079        0,
3080        0,
3081 /*83*/ MatLoad_MPIAIJ,
3082        0,
3083        0,
3084        0,
3085        0,
3086        0,
3087 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3088        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3089        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3090        MatPtAP_Basic,
3091        MatPtAPSymbolic_MPIAIJ,
3092 /*94*/ MatPtAPNumeric_MPIAIJ,
3093        0,
3094        0,
3095        0,
3096        0,
3097 /*99*/ 0,
3098        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3099        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3100        MatConjugate_MPIAIJ,
3101        0,
3102 /*104*/MatSetValuesRow_MPIAIJ,
3103        MatRealPart_MPIAIJ,
3104        MatImaginaryPart_MPIAIJ,
3105        0,
3106        0,
3107 /*109*/0,
3108        MatGetRedundantMatrix_MPIAIJ,
3109        MatGetRowMin_MPIAIJ,
3110        0,
3111        0,
3112 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3113        0,
3114        0,
3115        0,
3116        0,
3117 /*119*/0,
3118        0,
3119        0,
3120        0,
3121        MatGetMultiProcBlock_MPIAIJ,
3122 /*124*/MatFindNonZeroRows_MPIAIJ,
3123        MatGetColumnNorms_MPIAIJ,
3124        MatInvertBlockDiagonal_MPIAIJ,
3125        0,
3126        MatGetSubMatricesParallel_MPIAIJ,
3127 /*129*/0
3128 };
3129 
3130 /* ----------------------------------------------------------------------------------------*/
3131 
3132 EXTERN_C_BEGIN
3133 #undef __FUNCT__
3134 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3135 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3136 {
3137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3138   PetscErrorCode ierr;
3139 
3140   PetscFunctionBegin;
3141   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3142   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3143   PetscFunctionReturn(0);
3144 }
3145 EXTERN_C_END
3146 
3147 EXTERN_C_BEGIN
3148 #undef __FUNCT__
3149 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3150 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3151 {
3152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3153   PetscErrorCode ierr;
3154 
3155   PetscFunctionBegin;
3156   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3157   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3158   PetscFunctionReturn(0);
3159 }
3160 EXTERN_C_END
3161 
3162 EXTERN_C_BEGIN
3163 #undef __FUNCT__
3164 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3165 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3166 {
3167   Mat_MPIAIJ     *b;
3168   PetscErrorCode ierr;
3169   PetscInt       i;
3170 
3171   PetscFunctionBegin;
3172   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3173   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3174   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3175   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3176 
3177   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3178   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3179   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3180   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3181   if (d_nnz) {
3182     for (i=0; i<B->rmap->n; i++) {
3183       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3184     }
3185   }
3186   if (o_nnz) {
3187     for (i=0; i<B->rmap->n; i++) {
3188       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3189     }
3190   }
3191   b = (Mat_MPIAIJ*)B->data;
3192 
3193   if (!B->preallocated) {
3194     /* Explicitly create 2 MATSEQAIJ matrices. */
3195     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3196     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3197     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3198     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3199     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3200     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3201     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3202     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3203   }
3204 
3205   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3206   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3207   B->preallocated = PETSC_TRUE;
3208   PetscFunctionReturn(0);
3209 }
3210 EXTERN_C_END
3211 
3212 #undef __FUNCT__
3213 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3214 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3215 {
3216   Mat            mat;
3217   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3218   PetscErrorCode ierr;
3219 
3220   PetscFunctionBegin;
3221   *newmat       = 0;
3222   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3223   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3224   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3225   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3226   a    = (Mat_MPIAIJ*)mat->data;
3227 
3228   mat->factortype    = matin->factortype;
3229   mat->rmap->bs      = matin->rmap->bs;
3230   mat->assembled    = PETSC_TRUE;
3231   mat->insertmode   = NOT_SET_VALUES;
3232   mat->preallocated = PETSC_TRUE;
3233 
3234   a->size           = oldmat->size;
3235   a->rank           = oldmat->rank;
3236   a->donotstash     = oldmat->donotstash;
3237   a->roworiented    = oldmat->roworiented;
3238   a->rowindices     = 0;
3239   a->rowvalues      = 0;
3240   a->getrowactive   = PETSC_FALSE;
3241 
3242   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3243   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3244 
3245   if (oldmat->colmap) {
3246 #if defined (PETSC_USE_CTABLE)
3247     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3248 #else
3249     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3250     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3251     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3252 #endif
3253   } else a->colmap = 0;
3254   if (oldmat->garray) {
3255     PetscInt len;
3256     len  = oldmat->B->cmap->n;
3257     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3258     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3259     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3260   } else a->garray = 0;
3261 
3262   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3263   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3264   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3265   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3266   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3267   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3268   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3269   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3270   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3271   *newmat = mat;
3272   PetscFunctionReturn(0);
3273 }
3274 
3275 
3276 
3277 #undef __FUNCT__
3278 #define __FUNCT__ "MatLoad_MPIAIJ"
3279 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3280 {
3281   PetscScalar    *vals,*svals;
3282   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3283   PetscErrorCode ierr;
3284   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3285   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3286   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3287   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3288   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3289   int            fd;
3290 
3291   PetscFunctionBegin;
3292   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3293   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3294   if (!rank) {
3295     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3296     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3297     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3298   }
3299 
3300   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3301 
3302   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3303   M = header[1]; N = header[2];
3304   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3305   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3306   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3307 
3308   /* If global sizes are set, check if they are consistent with that given in the file */
3309   if (sizesset) {
3310     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3311   }
3312   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3313   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3314 
3315   /* determine ownership of all rows */
3316   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3317   else m = newMat->rmap->n; /* Set by user */
3318 
3319   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3320   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3321 
3322   /* First process needs enough room for process with most rows */
3323   if (!rank) {
3324     mmax       = rowners[1];
3325     for (i=2; i<size; i++) {
3326       mmax = PetscMax(mmax,rowners[i]);
3327     }
3328   } else mmax = m;
3329 
3330   rowners[0] = 0;
3331   for (i=2; i<=size; i++) {
3332     rowners[i] += rowners[i-1];
3333   }
3334   rstart = rowners[rank];
3335   rend   = rowners[rank+1];
3336 
3337   /* distribute row lengths to all processors */
3338   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3339   if (!rank) {
3340     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3341     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3342     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3343     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3344     for (j=0; j<m; j++) {
3345       procsnz[0] += ourlens[j];
3346     }
3347     for (i=1; i<size; i++) {
3348       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3349       /* calculate the number of nonzeros on each processor */
3350       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3351         procsnz[i] += rowlengths[j];
3352       }
3353       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3354     }
3355     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3356   } else {
3357     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3358   }
3359 
3360   if (!rank) {
3361     /* determine max buffer needed and allocate it */
3362     maxnz = 0;
3363     for (i=0; i<size; i++) {
3364       maxnz = PetscMax(maxnz,procsnz[i]);
3365     }
3366     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3367 
3368     /* read in my part of the matrix column indices  */
3369     nz   = procsnz[0];
3370     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3371     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3372 
3373     /* read in every one elses and ship off */
3374     for (i=1; i<size; i++) {
3375       nz     = procsnz[i];
3376       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3377       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3378     }
3379     ierr = PetscFree(cols);CHKERRQ(ierr);
3380   } else {
3381     /* determine buffer space needed for message */
3382     nz = 0;
3383     for (i=0; i<m; i++) {
3384       nz += ourlens[i];
3385     }
3386     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3387 
3388     /* receive message of column indices*/
3389     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3390   }
3391 
3392   /* determine column ownership if matrix is not square */
3393   if (N != M) {
3394     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3395     else n = newMat->cmap->n;
3396     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3397     cstart = cend - n;
3398   } else {
3399     cstart = rstart;
3400     cend   = rend;
3401     n      = cend - cstart;
3402   }
3403 
3404   /* loop over local rows, determining number of off diagonal entries */
3405   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3406   jj = 0;
3407   for (i=0; i<m; i++) {
3408     for (j=0; j<ourlens[i]; j++) {
3409       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3410       jj++;
3411     }
3412   }
3413 
3414   for (i=0; i<m; i++) {
3415     ourlens[i] -= offlens[i];
3416   }
3417   if (!sizesset) {
3418     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3419   }
3420   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3421 
3422   for (i=0; i<m; i++) {
3423     ourlens[i] += offlens[i];
3424   }
3425 
3426   if (!rank) {
3427     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3428 
3429     /* read in my part of the matrix numerical values  */
3430     nz   = procsnz[0];
3431     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3432 
3433     /* insert into matrix */
3434     jj      = rstart;
3435     smycols = mycols;
3436     svals   = vals;
3437     for (i=0; i<m; i++) {
3438       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3439       smycols += ourlens[i];
3440       svals   += ourlens[i];
3441       jj++;
3442     }
3443 
3444     /* read in other processors and ship out */
3445     for (i=1; i<size; i++) {
3446       nz     = procsnz[i];
3447       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3448       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3449     }
3450     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3451   } else {
3452     /* receive numeric values */
3453     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3454 
3455     /* receive message of values*/
3456     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3457 
3458     /* insert into matrix */
3459     jj      = rstart;
3460     smycols = mycols;
3461     svals   = vals;
3462     for (i=0; i<m; i++) {
3463       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3464       smycols += ourlens[i];
3465       svals   += ourlens[i];
3466       jj++;
3467     }
3468   }
3469   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3470   ierr = PetscFree(vals);CHKERRQ(ierr);
3471   ierr = PetscFree(mycols);CHKERRQ(ierr);
3472   ierr = PetscFree(rowners);CHKERRQ(ierr);
3473 
3474   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3475   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3476   PetscFunctionReturn(0);
3477 }
3478 
3479 #undef __FUNCT__
3480 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3481 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3482 {
3483   PetscErrorCode ierr;
3484   IS             iscol_local;
3485   PetscInt       csize;
3486 
3487   PetscFunctionBegin;
3488   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3489   if (call == MAT_REUSE_MATRIX) {
3490     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3491     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3492   } else {
3493     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3494   }
3495   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3496   if (call == MAT_INITIAL_MATRIX) {
3497     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3498     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3499   }
3500   PetscFunctionReturn(0);
3501 }
3502 
3503 #undef __FUNCT__
3504 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3505 /*
3506     Not great since it makes two copies of the submatrix, first an SeqAIJ
3507   in local and then by concatenating the local matrices the end result.
3508   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3509 
3510   Note: This requires a sequential iscol with all indices.
3511 */
3512 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3513 {
3514   PetscErrorCode ierr;
3515   PetscMPIInt    rank,size;
3516   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3517   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3518   Mat            *local,M,Mreuse;
3519   MatScalar      *vwork,*aa;
3520   MPI_Comm       comm = ((PetscObject)mat)->comm;
3521   Mat_SeqAIJ     *aij;
3522 
3523 
3524   PetscFunctionBegin;
3525   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3527 
3528   if (call ==  MAT_REUSE_MATRIX) {
3529     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3530     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3531     local = &Mreuse;
3532     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3533   } else {
3534     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3535     Mreuse = *local;
3536     ierr   = PetscFree(local);CHKERRQ(ierr);
3537   }
3538 
3539   /*
3540       m - number of local rows
3541       n - number of columns (same on all processors)
3542       rstart - first row in new global matrix generated
3543   */
3544   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3545   if (call == MAT_INITIAL_MATRIX) {
3546     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3547     ii  = aij->i;
3548     jj  = aij->j;
3549 
3550     /*
3551         Determine the number of non-zeros in the diagonal and off-diagonal
3552         portions of the matrix in order to do correct preallocation
3553     */
3554 
3555     /* first get start and end of "diagonal" columns */
3556     if (csize == PETSC_DECIDE) {
3557       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3558       if (mglobal == n) { /* square matrix */
3559 	nlocal = m;
3560       } else {
3561         nlocal = n/size + ((n % size) > rank);
3562       }
3563     } else {
3564       nlocal = csize;
3565     }
3566     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3567     rstart = rend - nlocal;
3568     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3569 
3570     /* next, compute all the lengths */
3571     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3572     olens = dlens + m;
3573     for (i=0; i<m; i++) {
3574       jend = ii[i+1] - ii[i];
3575       olen = 0;
3576       dlen = 0;
3577       for (j=0; j<jend; j++) {
3578         if (*jj < rstart || *jj >= rend) olen++;
3579         else dlen++;
3580         jj++;
3581       }
3582       olens[i] = olen;
3583       dlens[i] = dlen;
3584     }
3585     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3586     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3587     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3588     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3589     ierr = PetscFree(dlens);CHKERRQ(ierr);
3590   } else {
3591     PetscInt ml,nl;
3592 
3593     M = *newmat;
3594     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3595     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3596     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3597     /*
3598          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3599        rather than the slower MatSetValues().
3600     */
3601     M->was_assembled = PETSC_TRUE;
3602     M->assembled     = PETSC_FALSE;
3603   }
3604   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3605   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3606   ii  = aij->i;
3607   jj  = aij->j;
3608   aa  = aij->a;
3609   for (i=0; i<m; i++) {
3610     row   = rstart + i;
3611     nz    = ii[i+1] - ii[i];
3612     cwork = jj;     jj += nz;
3613     vwork = aa;     aa += nz;
3614     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3615   }
3616 
3617   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3618   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3619   *newmat = M;
3620 
3621   /* save submatrix used in processor for next request */
3622   if (call ==  MAT_INITIAL_MATRIX) {
3623     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3624     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3625   }
3626 
3627   PetscFunctionReturn(0);
3628 }
3629 
3630 EXTERN_C_BEGIN
3631 #undef __FUNCT__
3632 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3633 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3634 {
3635   PetscInt       m,cstart, cend,j,nnz,i,d;
3636   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3637   const PetscInt *JJ;
3638   PetscScalar    *values;
3639   PetscErrorCode ierr;
3640 
3641   PetscFunctionBegin;
3642   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3643 
3644   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3645   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3646   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3647   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3648   m      = B->rmap->n;
3649   cstart = B->cmap->rstart;
3650   cend   = B->cmap->rend;
3651   rstart = B->rmap->rstart;
3652 
3653   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3654 
3655 #if defined(PETSC_USE_DEBUGGING)
3656   for (i=0; i<m; i++) {
3657     nnz     = Ii[i+1]- Ii[i];
3658     JJ      = J + Ii[i];
3659     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3660     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3661     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3662   }
3663 #endif
3664 
3665   for (i=0; i<m; i++) {
3666     nnz     = Ii[i+1]- Ii[i];
3667     JJ      = J + Ii[i];
3668     nnz_max = PetscMax(nnz_max,nnz);
3669     d       = 0;
3670     for (j=0; j<nnz; j++) {
3671       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3672     }
3673     d_nnz[i] = d;
3674     o_nnz[i] = nnz - d;
3675   }
3676   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3677   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3678 
3679   if (v) values = (PetscScalar*)v;
3680   else {
3681     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3682     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3683   }
3684 
3685   for (i=0; i<m; i++) {
3686     ii   = i + rstart;
3687     nnz  = Ii[i+1]- Ii[i];
3688     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3689   }
3690   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3691   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3692 
3693   if (!v) {
3694     ierr = PetscFree(values);CHKERRQ(ierr);
3695   }
3696   PetscFunctionReturn(0);
3697 }
3698 EXTERN_C_END
3699 
3700 #undef __FUNCT__
3701 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3702 /*@
3703    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3704    (the default parallel PETSc format).
3705 
3706    Collective on MPI_Comm
3707 
3708    Input Parameters:
3709 +  B - the matrix
3710 .  i - the indices into j for the start of each local row (starts with zero)
3711 .  j - the column indices for each local row (starts with zero)
3712 -  v - optional values in the matrix
3713 
3714    Level: developer
3715 
3716    Notes:
3717        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3718      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3719      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3720 
3721        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3722 
3723        The format which is used for the sparse matrix input, is equivalent to a
3724     row-major ordering.. i.e for the following matrix, the input data expected is
3725     as shown:
3726 
3727         1 0 0
3728         2 0 3     P0
3729        -------
3730         4 5 6     P1
3731 
3732      Process0 [P0]: rows_owned=[0,1]
3733         i =  {0,1,3}  [size = nrow+1  = 2+1]
3734         j =  {0,0,2}  [size = nz = 6]
3735         v =  {1,2,3}  [size = nz = 6]
3736 
3737      Process1 [P1]: rows_owned=[2]
3738         i =  {0,3}    [size = nrow+1  = 1+1]
3739         j =  {0,1,2}  [size = nz = 6]
3740         v =  {4,5,6}  [size = nz = 6]
3741 
3742 .keywords: matrix, aij, compressed row, sparse, parallel
3743 
3744 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3745           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3746 @*/
3747 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3748 {
3749   PetscErrorCode ierr;
3750 
3751   PetscFunctionBegin;
3752   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3753   PetscFunctionReturn(0);
3754 }
3755 
3756 #undef __FUNCT__
3757 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3758 /*@C
3759    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3760    (the default parallel PETSc format).  For good matrix assembly performance
3761    the user should preallocate the matrix storage by setting the parameters
3762    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3763    performance can be increased by more than a factor of 50.
3764 
3765    Collective on MPI_Comm
3766 
3767    Input Parameters:
3768 +  A - the matrix
3769 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3770            (same value is used for all local rows)
3771 .  d_nnz - array containing the number of nonzeros in the various rows of the
3772            DIAGONAL portion of the local submatrix (possibly different for each row)
3773            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3774            The size of this array is equal to the number of local rows, i.e 'm'.
3775            You must leave room for the diagonal entry even if it is zero.
3776 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3777            submatrix (same value is used for all local rows).
3778 -  o_nnz - array containing the number of nonzeros in the various rows of the
3779            OFF-DIAGONAL portion of the local submatrix (possibly different for
3780            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3781            structure. The size of this array is equal to the number
3782            of local rows, i.e 'm'.
3783 
3784    If the *_nnz parameter is given then the *_nz parameter is ignored
3785 
3786    The AIJ format (also called the Yale sparse matrix format or
3787    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3788    storage.  The stored row and column indices begin with zero.
3789    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3790 
3791    The parallel matrix is partitioned such that the first m0 rows belong to
3792    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3793    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3794 
3795    The DIAGONAL portion of the local submatrix of a processor can be defined
3796    as the submatrix which is obtained by extraction the part corresponding to
3797    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3798    first row that belongs to the processor, r2 is the last row belonging to
3799    the this processor, and c1-c2 is range of indices of the local part of a
3800    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3801    common case of a square matrix, the row and column ranges are the same and
3802    the DIAGONAL part is also square. The remaining portion of the local
3803    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3804 
3805    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3806 
3807    You can call MatGetInfo() to get information on how effective the preallocation was;
3808    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3809    You can also run with the option -info and look for messages with the string
3810    malloc in them to see if additional memory allocation was needed.
3811 
3812    Example usage:
3813 
3814    Consider the following 8x8 matrix with 34 non-zero values, that is
3815    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3816    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3817    as follows:
3818 
3819 .vb
3820             1  2  0  |  0  3  0  |  0  4
3821     Proc0   0  5  6  |  7  0  0  |  8  0
3822             9  0 10  | 11  0  0  | 12  0
3823     -------------------------------------
3824            13  0 14  | 15 16 17  |  0  0
3825     Proc1   0 18  0  | 19 20 21  |  0  0
3826             0  0  0  | 22 23  0  | 24  0
3827     -------------------------------------
3828     Proc2  25 26 27  |  0  0 28  | 29  0
3829            30  0  0  | 31 32 33  |  0 34
3830 .ve
3831 
3832    This can be represented as a collection of submatrices as:
3833 
3834 .vb
3835       A B C
3836       D E F
3837       G H I
3838 .ve
3839 
3840    Where the submatrices A,B,C are owned by proc0, D,E,F are
3841    owned by proc1, G,H,I are owned by proc2.
3842 
3843    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3844    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3845    The 'M','N' parameters are 8,8, and have the same values on all procs.
3846 
3847    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3848    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3849    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3850    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3851    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3852    matrix, ans [DF] as another SeqAIJ matrix.
3853 
3854    When d_nz, o_nz parameters are specified, d_nz storage elements are
3855    allocated for every row of the local diagonal submatrix, and o_nz
3856    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3857    One way to choose d_nz and o_nz is to use the max nonzerors per local
3858    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3859    In this case, the values of d_nz,o_nz are:
3860 .vb
3861      proc0 : dnz = 2, o_nz = 2
3862      proc1 : dnz = 3, o_nz = 2
3863      proc2 : dnz = 1, o_nz = 4
3864 .ve
3865    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3866    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3867    for proc3. i.e we are using 12+15+10=37 storage locations to store
3868    34 values.
3869 
3870    When d_nnz, o_nnz parameters are specified, the storage is specified
3871    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3872    In the above case the values for d_nnz,o_nnz are:
3873 .vb
3874      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3875      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3876      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3877 .ve
3878    Here the space allocated is sum of all the above values i.e 34, and
3879    hence pre-allocation is perfect.
3880 
3881    Level: intermediate
3882 
3883 .keywords: matrix, aij, compressed row, sparse, parallel
3884 
3885 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3886           MPIAIJ, MatGetInfo()
3887 @*/
3888 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3889 {
3890   PetscErrorCode ierr;
3891 
3892   PetscFunctionBegin;
3893   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3894   PetscFunctionReturn(0);
3895 }
3896 
3897 #undef __FUNCT__
3898 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3899 /*@
3900      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3901          CSR format the local rows.
3902 
3903    Collective on MPI_Comm
3904 
3905    Input Parameters:
3906 +  comm - MPI communicator
3907 .  m - number of local rows (Cannot be PETSC_DECIDE)
3908 .  n - This value should be the same as the local size used in creating the
3909        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3910        calculated if N is given) For square matrices n is almost always m.
3911 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3912 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3913 .   i - row indices
3914 .   j - column indices
3915 -   a - matrix values
3916 
3917    Output Parameter:
3918 .   mat - the matrix
3919 
3920    Level: intermediate
3921 
3922    Notes:
3923        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3924      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3925      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3926 
3927        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3928 
3929        The format which is used for the sparse matrix input, is equivalent to a
3930     row-major ordering.. i.e for the following matrix, the input data expected is
3931     as shown:
3932 
3933         1 0 0
3934         2 0 3     P0
3935        -------
3936         4 5 6     P1
3937 
3938      Process0 [P0]: rows_owned=[0,1]
3939         i =  {0,1,3}  [size = nrow+1  = 2+1]
3940         j =  {0,0,2}  [size = nz = 6]
3941         v =  {1,2,3}  [size = nz = 6]
3942 
3943      Process1 [P1]: rows_owned=[2]
3944         i =  {0,3}    [size = nrow+1  = 1+1]
3945         j =  {0,1,2}  [size = nz = 6]
3946         v =  {4,5,6}  [size = nz = 6]
3947 
3948 .keywords: matrix, aij, compressed row, sparse, parallel
3949 
3950 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3951           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3952 @*/
3953 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3954 {
3955   PetscErrorCode ierr;
3956 
3957  PetscFunctionBegin;
3958   if (i[0]) {
3959     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3960   }
3961   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3962   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3963   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3964   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3965   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3966   PetscFunctionReturn(0);
3967 }
3968 
3969 #undef __FUNCT__
3970 #define __FUNCT__ "MatCreateMPIAIJ"
3971 /*@C
3972    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3973    (the default parallel PETSc format).  For good matrix assembly performance
3974    the user should preallocate the matrix storage by setting the parameters
3975    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3976    performance can be increased by more than a factor of 50.
3977 
3978    Collective on MPI_Comm
3979 
3980    Input Parameters:
3981 +  comm - MPI communicator
3982 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3983            This value should be the same as the local size used in creating the
3984            y vector for the matrix-vector product y = Ax.
3985 .  n - This value should be the same as the local size used in creating the
3986        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3987        calculated if N is given) For square matrices n is almost always m.
3988 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3989 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3990 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3991            (same value is used for all local rows)
3992 .  d_nnz - array containing the number of nonzeros in the various rows of the
3993            DIAGONAL portion of the local submatrix (possibly different for each row)
3994            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3995            The size of this array is equal to the number of local rows, i.e 'm'.
3996            You must leave room for the diagonal entry even if it is zero.
3997 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3998            submatrix (same value is used for all local rows).
3999 -  o_nnz - array containing the number of nonzeros in the various rows of the
4000            OFF-DIAGONAL portion of the local submatrix (possibly different for
4001            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4002            structure. The size of this array is equal to the number
4003            of local rows, i.e 'm'.
4004 
4005    Output Parameter:
4006 .  A - the matrix
4007 
4008    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4009    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4010    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4011 
4012    Notes:
4013    If the *_nnz parameter is given then the *_nz parameter is ignored
4014 
4015    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4016    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4017    storage requirements for this matrix.
4018 
4019    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4020    processor than it must be used on all processors that share the object for
4021    that argument.
4022 
4023    The user MUST specify either the local or global matrix dimensions
4024    (possibly both).
4025 
4026    The parallel matrix is partitioned across processors such that the
4027    first m0 rows belong to process 0, the next m1 rows belong to
4028    process 1, the next m2 rows belong to process 2 etc.. where
4029    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4030    values corresponding to [m x N] submatrix.
4031 
4032    The columns are logically partitioned with the n0 columns belonging
4033    to 0th partition, the next n1 columns belonging to the next
4034    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4035 
4036    The DIAGONAL portion of the local submatrix on any given processor
4037    is the submatrix corresponding to the rows and columns m,n
4038    corresponding to the given processor. i.e diagonal matrix on
4039    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4040    etc. The remaining portion of the local submatrix [m x (N-n)]
4041    constitute the OFF-DIAGONAL portion. The example below better
4042    illustrates this concept.
4043 
4044    For a square global matrix we define each processor's diagonal portion
4045    to be its local rows and the corresponding columns (a square submatrix);
4046    each processor's off-diagonal portion encompasses the remainder of the
4047    local matrix (a rectangular submatrix).
4048 
4049    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4050 
4051    When calling this routine with a single process communicator, a matrix of
4052    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4053    type of communicator, use the construction mechanism:
4054      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4055 
4056    By default, this format uses inodes (identical nodes) when possible.
4057    We search for consecutive rows with the same nonzero structure, thereby
4058    reusing matrix information to achieve increased efficiency.
4059 
4060    Options Database Keys:
4061 +  -mat_no_inode  - Do not use inodes
4062 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4063 -  -mat_aij_oneindex - Internally use indexing starting at 1
4064         rather than 0.  Note that when calling MatSetValues(),
4065         the user still MUST index entries starting at 0!
4066 
4067 
4068    Example usage:
4069 
4070    Consider the following 8x8 matrix with 34 non-zero values, that is
4071    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4072    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4073    as follows:
4074 
4075 .vb
4076             1  2  0  |  0  3  0  |  0  4
4077     Proc0   0  5  6  |  7  0  0  |  8  0
4078             9  0 10  | 11  0  0  | 12  0
4079     -------------------------------------
4080            13  0 14  | 15 16 17  |  0  0
4081     Proc1   0 18  0  | 19 20 21  |  0  0
4082             0  0  0  | 22 23  0  | 24  0
4083     -------------------------------------
4084     Proc2  25 26 27  |  0  0 28  | 29  0
4085            30  0  0  | 31 32 33  |  0 34
4086 .ve
4087 
4088    This can be represented as a collection of submatrices as:
4089 
4090 .vb
4091       A B C
4092       D E F
4093       G H I
4094 .ve
4095 
4096    Where the submatrices A,B,C are owned by proc0, D,E,F are
4097    owned by proc1, G,H,I are owned by proc2.
4098 
4099    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4100    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4101    The 'M','N' parameters are 8,8, and have the same values on all procs.
4102 
4103    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4104    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4105    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4106    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4107    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4108    matrix, ans [DF] as another SeqAIJ matrix.
4109 
4110    When d_nz, o_nz parameters are specified, d_nz storage elements are
4111    allocated for every row of the local diagonal submatrix, and o_nz
4112    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4113    One way to choose d_nz and o_nz is to use the max nonzerors per local
4114    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4115    In this case, the values of d_nz,o_nz are:
4116 .vb
4117      proc0 : dnz = 2, o_nz = 2
4118      proc1 : dnz = 3, o_nz = 2
4119      proc2 : dnz = 1, o_nz = 4
4120 .ve
4121    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4122    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4123    for proc3. i.e we are using 12+15+10=37 storage locations to store
4124    34 values.
4125 
4126    When d_nnz, o_nnz parameters are specified, the storage is specified
4127    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4128    In the above case the values for d_nnz,o_nnz are:
4129 .vb
4130      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4131      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4132      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4133 .ve
4134    Here the space allocated is sum of all the above values i.e 34, and
4135    hence pre-allocation is perfect.
4136 
4137    Level: intermediate
4138 
4139 .keywords: matrix, aij, compressed row, sparse, parallel
4140 
4141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4142           MPIAIJ, MatCreateMPIAIJWithArrays()
4143 @*/
4144 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4145 {
4146   PetscErrorCode ierr;
4147   PetscMPIInt    size;
4148 
4149   PetscFunctionBegin;
4150   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4151   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4152   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4153   if (size > 1) {
4154     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4155     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4156   } else {
4157     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4158     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4159   }
4160   PetscFunctionReturn(0);
4161 }
4162 
4163 #undef __FUNCT__
4164 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4165 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4166 {
4167   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4168 
4169   PetscFunctionBegin;
4170   *Ad     = a->A;
4171   *Ao     = a->B;
4172   *colmap = a->garray;
4173   PetscFunctionReturn(0);
4174 }
4175 
4176 #undef __FUNCT__
4177 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4178 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4179 {
4180   PetscErrorCode ierr;
4181   PetscInt       i;
4182   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4183 
4184   PetscFunctionBegin;
4185   if (coloring->ctype == IS_COLORING_GLOBAL) {
4186     ISColoringValue *allcolors,*colors;
4187     ISColoring      ocoloring;
4188 
4189     /* set coloring for diagonal portion */
4190     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4191 
4192     /* set coloring for off-diagonal portion */
4193     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4194     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4195     for (i=0; i<a->B->cmap->n; i++) {
4196       colors[i] = allcolors[a->garray[i]];
4197     }
4198     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4199     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4200     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4201     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4202   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4203     ISColoringValue *colors;
4204     PetscInt        *larray;
4205     ISColoring      ocoloring;
4206 
4207     /* set coloring for diagonal portion */
4208     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4209     for (i=0; i<a->A->cmap->n; i++) {
4210       larray[i] = i + A->cmap->rstart;
4211     }
4212     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4213     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4214     for (i=0; i<a->A->cmap->n; i++) {
4215       colors[i] = coloring->colors[larray[i]];
4216     }
4217     ierr = PetscFree(larray);CHKERRQ(ierr);
4218     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4219     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4220     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4221 
4222     /* set coloring for off-diagonal portion */
4223     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4224     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4225     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4226     for (i=0; i<a->B->cmap->n; i++) {
4227       colors[i] = coloring->colors[larray[i]];
4228     }
4229     ierr = PetscFree(larray);CHKERRQ(ierr);
4230     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4231     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4232     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4233   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4234 
4235   PetscFunctionReturn(0);
4236 }
4237 
4238 #if defined(PETSC_HAVE_ADIC)
4239 #undef __FUNCT__
4240 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4241 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4242 {
4243   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4244   PetscErrorCode ierr;
4245 
4246   PetscFunctionBegin;
4247   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4248   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4249   PetscFunctionReturn(0);
4250 }
4251 #endif
4252 
4253 #undef __FUNCT__
4254 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4255 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4256 {
4257   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4258   PetscErrorCode ierr;
4259 
4260   PetscFunctionBegin;
4261   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4262   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4263   PetscFunctionReturn(0);
4264 }
4265 
4266 #undef __FUNCT__
4267 #define __FUNCT__ "MatMergeSymbolic"
4268 PetscErrorCode  MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4269 {
4270   PetscErrorCode ierr;
4271   PetscInt       m,N,i,rstart,nnz,*dnz,*onz;
4272   PetscInt       *indx;
4273 
4274   PetscFunctionBegin;
4275   /* This routine will ONLY return MPIAIJ type matrix */
4276   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4277   if (n == PETSC_DECIDE){
4278     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4279   }
4280   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4281   rstart -= m;
4282 
4283   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4284   for (i=0;i<m;i++) {
4285     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4286     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4287     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4288   }
4289 
4290   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4291   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4292   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4293   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4294   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4295   PetscFunctionReturn(0);
4296 }
4297 
4298 #undef __FUNCT__
4299 #define __FUNCT__ "MatMergeNumeric"
4300 PetscErrorCode  MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4301 {
4302   PetscErrorCode ierr;
4303   PetscInt       m,N,i,rstart,nnz,Ii;
4304   PetscInt       *indx;
4305   PetscScalar    *values;
4306 
4307   PetscFunctionBegin;
4308   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4309   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4310   for (i=0;i<m;i++) {
4311     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4312     Ii    = i + rstart;
4313     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4314     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4315   }
4316   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4317   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4318   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4319   PetscFunctionReturn(0);
4320 }
4321 
4322 #undef __FUNCT__
4323 #define __FUNCT__ "MatMerge"
4324 /*@
4325       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4326                  matrices from each processor
4327 
4328     Collective on MPI_Comm
4329 
4330    Input Parameters:
4331 +    comm - the communicators the parallel matrix will live on
4332 .    inmat - the input sequential matrices
4333 .    n - number of local columns (or PETSC_DECIDE)
4334 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4335 
4336    Output Parameter:
4337 .    outmat - the parallel matrix generated
4338 
4339     Level: advanced
4340 
4341    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4342 
4343 @*/
4344 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4345 {
4346   PetscErrorCode ierr;
4347 
4348   PetscFunctionBegin;
4349   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4350   if (scall == MAT_INITIAL_MATRIX){
4351     ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4352   }
4353   ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4354   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4355   PetscFunctionReturn(0);
4356 }
4357 
4358 #undef __FUNCT__
4359 #define __FUNCT__ "MatFileSplit"
4360 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4361 {
4362   PetscErrorCode    ierr;
4363   PetscMPIInt       rank;
4364   PetscInt          m,N,i,rstart,nnz;
4365   size_t            len;
4366   const PetscInt    *indx;
4367   PetscViewer       out;
4368   char              *name;
4369   Mat               B;
4370   const PetscScalar *values;
4371 
4372   PetscFunctionBegin;
4373   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4374   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4375   /* Should this be the type of the diagonal block of A? */
4376   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4377   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4378   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4379   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4380   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4381   for (i=0;i<m;i++) {
4382     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4383     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4384     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4385   }
4386   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4387   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4388 
4389   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4390   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4391   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4392   sprintf(name,"%s.%d",outfile,rank);
4393   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4394   ierr = PetscFree(name);
4395   ierr = MatView(B,out);CHKERRQ(ierr);
4396   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4397   ierr = MatDestroy(&B);CHKERRQ(ierr);
4398   PetscFunctionReturn(0);
4399 }
4400 
4401 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4402 #undef __FUNCT__
4403 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4404 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4405 {
4406   PetscErrorCode       ierr;
4407   Mat_Merge_SeqsToMPI  *merge;
4408   PetscContainer       container;
4409 
4410   PetscFunctionBegin;
4411   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4412   if (container) {
4413     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4414     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4415     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4416     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4417     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4418     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4419     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4420     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4421     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4422     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4423     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4424     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4425     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4426     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4427     ierr = PetscFree(merge);CHKERRQ(ierr);
4428     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4429   }
4430   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4431   PetscFunctionReturn(0);
4432 }
4433 
4434 #include <../src/mat/utils/freespace.h>
4435 #include <petscbt.h>
4436 
4437 #undef __FUNCT__
4438 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4439 /*@C
4440       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4441                  matrices from each processor
4442 
4443     Collective on MPI_Comm
4444 
4445    Input Parameters:
4446 +    comm - the communicators the parallel matrix will live on
4447 .    seqmat - the input sequential matrices
4448 .    m - number of local rows (or PETSC_DECIDE)
4449 .    n - number of local columns (or PETSC_DECIDE)
4450 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4451 
4452    Output Parameter:
4453 .    mpimat - the parallel matrix generated
4454 
4455     Level: advanced
4456 
4457    Notes:
4458      The dimensions of the sequential matrix in each processor MUST be the same.
4459      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4460      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4461 @*/
4462 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4463 {
4464   PetscErrorCode       ierr;
4465   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4466   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4467   PetscMPIInt          size,rank,taga,*len_s;
4468   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4469   PetscInt             proc,m;
4470   PetscInt             **buf_ri,**buf_rj;
4471   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4472   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4473   MPI_Request          *s_waits,*r_waits;
4474   MPI_Status           *status;
4475   MatScalar            *aa=a->a;
4476   MatScalar            **abuf_r,*ba_i;
4477   Mat_Merge_SeqsToMPI  *merge;
4478   PetscContainer       container;
4479 
4480   PetscFunctionBegin;
4481   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4482 
4483   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4484   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4485 
4486   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4487   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4488 
4489   bi     = merge->bi;
4490   bj     = merge->bj;
4491   buf_ri = merge->buf_ri;
4492   buf_rj = merge->buf_rj;
4493 
4494   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4495   owners = merge->rowmap->range;
4496   len_s  = merge->len_s;
4497 
4498   /* send and recv matrix values */
4499   /*-----------------------------*/
4500   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4501   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4502 
4503   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4504   for (proc=0,k=0; proc<size; proc++){
4505     if (!len_s[proc]) continue;
4506     i = owners[proc];
4507     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4508     k++;
4509   }
4510 
4511   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4512   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4513   ierr = PetscFree(status);CHKERRQ(ierr);
4514 
4515   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4516   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4517 
4518   /* insert mat values of mpimat */
4519   /*----------------------------*/
4520   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4521   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4522 
4523   for (k=0; k<merge->nrecv; k++){
4524     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4525     nrows = *(buf_ri_k[k]);
4526     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4527     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4528   }
4529 
4530   /* set values of ba */
4531   m = merge->rowmap->n;
4532   for (i=0; i<m; i++) {
4533     arow = owners[rank] + i;
4534     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4535     bnzi = bi[i+1] - bi[i];
4536     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4537 
4538     /* add local non-zero vals of this proc's seqmat into ba */
4539     anzi = ai[arow+1] - ai[arow];
4540     aj   = a->j + ai[arow];
4541     aa   = a->a + ai[arow];
4542     nextaj = 0;
4543     for (j=0; nextaj<anzi; j++){
4544       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4545         ba_i[j] += aa[nextaj++];
4546       }
4547     }
4548 
4549     /* add received vals into ba */
4550     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4551       /* i-th row */
4552       if (i == *nextrow[k]) {
4553         anzi = *(nextai[k]+1) - *nextai[k];
4554         aj   = buf_rj[k] + *(nextai[k]);
4555         aa   = abuf_r[k] + *(nextai[k]);
4556         nextaj = 0;
4557         for (j=0; nextaj<anzi; j++){
4558           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4559             ba_i[j] += aa[nextaj++];
4560           }
4561         }
4562         nextrow[k]++; nextai[k]++;
4563       }
4564     }
4565     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4566   }
4567   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4568   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4569 
4570   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4571   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4572   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4573   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4574   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4575   PetscFunctionReturn(0);
4576 }
4577 
4578 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4579 
4580 #undef __FUNCT__
4581 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4582 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4583 {
4584   PetscErrorCode       ierr;
4585   Mat                  B_mpi;
4586   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4587   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4588   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4589   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4590   PetscInt             len,proc,*dnz,*onz;
4591   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4592   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4593   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4594   MPI_Status           *status;
4595   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4596   PetscBT              lnkbt;
4597   Mat_Merge_SeqsToMPI  *merge;
4598   PetscContainer       container;
4599 
4600   PetscFunctionBegin;
4601   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4602 
4603   /* make sure it is a PETSc comm */
4604   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4605   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4606   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4607 
4608   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4609   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4610 
4611   /* determine row ownership */
4612   /*---------------------------------------------------------*/
4613   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4614   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4615   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4616   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4617   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4618   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4619   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4620 
4621   m      = merge->rowmap->n;
4622   M      = merge->rowmap->N;
4623   owners = merge->rowmap->range;
4624 
4625   /* determine the number of messages to send, their lengths */
4626   /*---------------------------------------------------------*/
4627   len_s  = merge->len_s;
4628 
4629   len = 0;  /* length of buf_si[] */
4630   merge->nsend = 0;
4631   for (proc=0; proc<size; proc++){
4632     len_si[proc] = 0;
4633     if (proc == rank){
4634       len_s[proc] = 0;
4635     } else {
4636       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4637       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4638     }
4639     if (len_s[proc]) {
4640       merge->nsend++;
4641       nrows = 0;
4642       for (i=owners[proc]; i<owners[proc+1]; i++){
4643         if (ai[i+1] > ai[i]) nrows++;
4644       }
4645       len_si[proc] = 2*(nrows+1);
4646       len += len_si[proc];
4647     }
4648   }
4649 
4650   /* determine the number and length of messages to receive for ij-structure */
4651   /*-------------------------------------------------------------------------*/
4652   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4653   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4654 
4655   /* post the Irecv of j-structure */
4656   /*-------------------------------*/
4657   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4658   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4659 
4660   /* post the Isend of j-structure */
4661   /*--------------------------------*/
4662   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4663 
4664   for (proc=0, k=0; proc<size; proc++){
4665     if (!len_s[proc]) continue;
4666     i = owners[proc];
4667     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4668     k++;
4669   }
4670 
4671   /* receives and sends of j-structure are complete */
4672   /*------------------------------------------------*/
4673   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4674   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4675 
4676   /* send and recv i-structure */
4677   /*---------------------------*/
4678   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4679   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4680 
4681   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4682   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4683   for (proc=0,k=0; proc<size; proc++){
4684     if (!len_s[proc]) continue;
4685     /* form outgoing message for i-structure:
4686          buf_si[0]:                 nrows to be sent
4687                [1:nrows]:           row index (global)
4688                [nrows+1:2*nrows+1]: i-structure index
4689     */
4690     /*-------------------------------------------*/
4691     nrows = len_si[proc]/2 - 1;
4692     buf_si_i    = buf_si + nrows+1;
4693     buf_si[0]   = nrows;
4694     buf_si_i[0] = 0;
4695     nrows = 0;
4696     for (i=owners[proc]; i<owners[proc+1]; i++){
4697       anzi = ai[i+1] - ai[i];
4698       if (anzi) {
4699         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4700         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4701         nrows++;
4702       }
4703     }
4704     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4705     k++;
4706     buf_si += len_si[proc];
4707   }
4708 
4709   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4710   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4711 
4712   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4713   for (i=0; i<merge->nrecv; i++){
4714     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4715   }
4716 
4717   ierr = PetscFree(len_si);CHKERRQ(ierr);
4718   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4719   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4720   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4721   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4722   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4723   ierr = PetscFree(status);CHKERRQ(ierr);
4724 
4725   /* compute a local seq matrix in each processor */
4726   /*----------------------------------------------*/
4727   /* allocate bi array and free space for accumulating nonzero column info */
4728   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4729   bi[0] = 0;
4730 
4731   /* create and initialize a linked list */
4732   nlnk = N+1;
4733   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4734 
4735   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4736   len = 0;
4737   len  = ai[owners[rank+1]] - ai[owners[rank]];
4738   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4739   current_space = free_space;
4740 
4741   /* determine symbolic info for each local row */
4742   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4743 
4744   for (k=0; k<merge->nrecv; k++){
4745     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4746     nrows = *buf_ri_k[k];
4747     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4748     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4749   }
4750 
4751   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4752   len = 0;
4753   for (i=0;i<m;i++) {
4754     bnzi   = 0;
4755     /* add local non-zero cols of this proc's seqmat into lnk */
4756     arow   = owners[rank] + i;
4757     anzi   = ai[arow+1] - ai[arow];
4758     aj     = a->j + ai[arow];
4759     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4760     bnzi += nlnk;
4761     /* add received col data into lnk */
4762     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4763       if (i == *nextrow[k]) { /* i-th row */
4764         anzi = *(nextai[k]+1) - *nextai[k];
4765         aj   = buf_rj[k] + *nextai[k];
4766         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4767         bnzi += nlnk;
4768         nextrow[k]++; nextai[k]++;
4769       }
4770     }
4771     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4772 
4773     /* if free space is not available, make more free space */
4774     if (current_space->local_remaining<bnzi) {
4775       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4776       nspacedouble++;
4777     }
4778     /* copy data into free space, then initialize lnk */
4779     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4780     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4781 
4782     current_space->array           += bnzi;
4783     current_space->local_used      += bnzi;
4784     current_space->local_remaining -= bnzi;
4785 
4786     bi[i+1] = bi[i] + bnzi;
4787   }
4788 
4789   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4790 
4791   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4792   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4793   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4794 
4795   /* create symbolic parallel matrix B_mpi */
4796   /*---------------------------------------*/
4797   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4798   if (n==PETSC_DECIDE) {
4799     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4800   } else {
4801     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4802   }
4803   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4804   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4805   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4806 
4807   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4808   B_mpi->assembled     = PETSC_FALSE;
4809   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4810   merge->bi            = bi;
4811   merge->bj            = bj;
4812   merge->buf_ri        = buf_ri;
4813   merge->buf_rj        = buf_rj;
4814   merge->coi           = PETSC_NULL;
4815   merge->coj           = PETSC_NULL;
4816   merge->owners_co     = PETSC_NULL;
4817 
4818   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4819 
4820   /* attach the supporting struct to B_mpi for reuse */
4821   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4822   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4823   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4824   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4825   *mpimat = B_mpi;
4826 
4827   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4828   PetscFunctionReturn(0);
4829 }
4830 
4831 #undef __FUNCT__
4832 #define __FUNCT__ "MatMerge_SeqsToMPI"
4833 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4834 {
4835   PetscErrorCode   ierr;
4836 
4837   PetscFunctionBegin;
4838   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4839   if (scall == MAT_INITIAL_MATRIX){
4840     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4841   }
4842   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4843   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4844   PetscFunctionReturn(0);
4845 }
4846 
4847 #undef __FUNCT__
4848 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4849 /*@
4850      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4851           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4852           with MatGetSize()
4853 
4854     Not Collective
4855 
4856    Input Parameters:
4857 +    A - the matrix
4858 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4859 
4860    Output Parameter:
4861 .    A_loc - the local sequential matrix generated
4862 
4863     Level: developer
4864 
4865 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4866 
4867 @*/
4868 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4869 {
4870   PetscErrorCode  ierr;
4871   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4872   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4873   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4874   MatScalar       *aa=a->a,*ba=b->a,*cam;
4875   PetscScalar     *ca;
4876   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4877   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4878   PetscBool       match;
4879 
4880   PetscFunctionBegin;
4881   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4882   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4883   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4884   if (scall == MAT_INITIAL_MATRIX){
4885     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4886     ci[0] = 0;
4887     for (i=0; i<am; i++){
4888       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4889     }
4890     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4891     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4892     k = 0;
4893     for (i=0; i<am; i++) {
4894       ncols_o = bi[i+1] - bi[i];
4895       ncols_d = ai[i+1] - ai[i];
4896       /* off-diagonal portion of A */
4897       for (jo=0; jo<ncols_o; jo++) {
4898         col = cmap[*bj];
4899         if (col >= cstart) break;
4900         cj[k]   = col; bj++;
4901         ca[k++] = *ba++;
4902       }
4903       /* diagonal portion of A */
4904       for (j=0; j<ncols_d; j++) {
4905         cj[k]   = cstart + *aj++;
4906         ca[k++] = *aa++;
4907       }
4908       /* off-diagonal portion of A */
4909       for (j=jo; j<ncols_o; j++) {
4910         cj[k]   = cmap[*bj++];
4911         ca[k++] = *ba++;
4912       }
4913     }
4914     /* put together the new matrix */
4915     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4916     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4917     /* Since these are PETSc arrays, change flags to free them as necessary. */
4918     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4919     mat->free_a  = PETSC_TRUE;
4920     mat->free_ij = PETSC_TRUE;
4921     mat->nonew   = 0;
4922   } else if (scall == MAT_REUSE_MATRIX){
4923     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4924     ci = mat->i; cj = mat->j; cam = mat->a;
4925     for (i=0; i<am; i++) {
4926       /* off-diagonal portion of A */
4927       ncols_o = bi[i+1] - bi[i];
4928       for (jo=0; jo<ncols_o; jo++) {
4929         col = cmap[*bj];
4930         if (col >= cstart) break;
4931         *cam++ = *ba++; bj++;
4932       }
4933       /* diagonal portion of A */
4934       ncols_d = ai[i+1] - ai[i];
4935       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4936       /* off-diagonal portion of A */
4937       for (j=jo; j<ncols_o; j++) {
4938         *cam++ = *ba++; bj++;
4939       }
4940     }
4941   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4942   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4943   PetscFunctionReturn(0);
4944 }
4945 
4946 #undef __FUNCT__
4947 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4948 /*@C
4949      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4950 
4951     Not Collective
4952 
4953    Input Parameters:
4954 +    A - the matrix
4955 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4956 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4957 
4958    Output Parameter:
4959 .    A_loc - the local sequential matrix generated
4960 
4961     Level: developer
4962 
4963 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4964 
4965 @*/
4966 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4967 {
4968   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4969   PetscErrorCode    ierr;
4970   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4971   IS                isrowa,iscola;
4972   Mat               *aloc;
4973   PetscBool       match;
4974 
4975   PetscFunctionBegin;
4976   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4977   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4978   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4979   if (!row){
4980     start = A->rmap->rstart; end = A->rmap->rend;
4981     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4982   } else {
4983     isrowa = *row;
4984   }
4985   if (!col){
4986     start = A->cmap->rstart;
4987     cmap  = a->garray;
4988     nzA   = a->A->cmap->n;
4989     nzB   = a->B->cmap->n;
4990     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4991     ncols = 0;
4992     for (i=0; i<nzB; i++) {
4993       if (cmap[i] < start) idx[ncols++] = cmap[i];
4994       else break;
4995     }
4996     imark = i;
4997     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4998     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4999     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5000   } else {
5001     iscola = *col;
5002   }
5003   if (scall != MAT_INITIAL_MATRIX){
5004     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5005     aloc[0] = *A_loc;
5006   }
5007   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5008   *A_loc = aloc[0];
5009   ierr = PetscFree(aloc);CHKERRQ(ierr);
5010   if (!row){
5011     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5012   }
5013   if (!col){
5014     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5015   }
5016   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5017   PetscFunctionReturn(0);
5018 }
5019 
5020 #undef __FUNCT__
5021 #define __FUNCT__ "MatGetBrowsOfAcols"
5022 /*@C
5023     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5024 
5025     Collective on Mat
5026 
5027    Input Parameters:
5028 +    A,B - the matrices in mpiaij format
5029 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5030 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5031 
5032    Output Parameter:
5033 +    rowb, colb - index sets of rows and columns of B to extract
5034 -    B_seq - the sequential matrix generated
5035 
5036     Level: developer
5037 
5038 @*/
5039 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5040 {
5041   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5042   PetscErrorCode    ierr;
5043   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5044   IS                isrowb,iscolb;
5045   Mat               *bseq=PETSC_NULL;
5046 
5047   PetscFunctionBegin;
5048   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5049     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5050   }
5051   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5052 
5053   if (scall == MAT_INITIAL_MATRIX){
5054     start = A->cmap->rstart;
5055     cmap  = a->garray;
5056     nzA   = a->A->cmap->n;
5057     nzB   = a->B->cmap->n;
5058     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5059     ncols = 0;
5060     for (i=0; i<nzB; i++) {  /* row < local row index */
5061       if (cmap[i] < start) idx[ncols++] = cmap[i];
5062       else break;
5063     }
5064     imark = i;
5065     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5066     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5067     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5068     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5069   } else {
5070     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5071     isrowb = *rowb; iscolb = *colb;
5072     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5073     bseq[0] = *B_seq;
5074   }
5075   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5076   *B_seq = bseq[0];
5077   ierr = PetscFree(bseq);CHKERRQ(ierr);
5078   if (!rowb){
5079     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5080   } else {
5081     *rowb = isrowb;
5082   }
5083   if (!colb){
5084     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5085   } else {
5086     *colb = iscolb;
5087   }
5088   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5089   PetscFunctionReturn(0);
5090 }
5091 
5092 #undef __FUNCT__
5093 #define __FUNCT__ "MatGetBrowsOfAoCols"
5094 /*@C
5095     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5096     of the OFF-DIAGONAL portion of local A
5097 
5098     Collective on Mat
5099 
5100    Input Parameters:
5101 +    A,B - the matrices in mpiaij format
5102 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5103 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5104 .    startsj_r - similar to startsj for receives
5105 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5106 
5107    Output Parameter:
5108 +    B_oth - the sequential matrix generated
5109 
5110     Level: developer
5111 
5112 @*/
5113 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5114 {
5115   VecScatter_MPI_General *gen_to,*gen_from;
5116   PetscErrorCode         ierr;
5117   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5118   Mat_SeqAIJ             *b_oth;
5119   VecScatter             ctx=a->Mvctx;
5120   MPI_Comm               comm=((PetscObject)ctx)->comm;
5121   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5122   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5123   PetscScalar            *rvalues,*svalues;
5124   MatScalar              *b_otha,*bufa,*bufA;
5125   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5126   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5127   MPI_Status             *sstatus,rstatus;
5128   PetscMPIInt            jj;
5129   PetscInt               *cols,sbs,rbs;
5130   PetscScalar            *vals;
5131 
5132   PetscFunctionBegin;
5133   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5134     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5135   }
5136   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5137   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5138 
5139   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5140   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5141   rvalues  = gen_from->values; /* holds the length of receiving row */
5142   svalues  = gen_to->values;   /* holds the length of sending row */
5143   nrecvs   = gen_from->n;
5144   nsends   = gen_to->n;
5145 
5146   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5147   srow     = gen_to->indices;   /* local row index to be sent */
5148   sstarts  = gen_to->starts;
5149   sprocs   = gen_to->procs;
5150   sstatus  = gen_to->sstatus;
5151   sbs      = gen_to->bs;
5152   rstarts  = gen_from->starts;
5153   rprocs   = gen_from->procs;
5154   rbs      = gen_from->bs;
5155 
5156   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5157   if (scall == MAT_INITIAL_MATRIX){
5158     /* i-array */
5159     /*---------*/
5160     /*  post receives */
5161     for (i=0; i<nrecvs; i++){
5162       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5163       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5164       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5165     }
5166 
5167     /* pack the outgoing message */
5168     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5169     sstartsj[0] = 0;  rstartsj[0] = 0;
5170     len = 0; /* total length of j or a array to be sent */
5171     k = 0;
5172     for (i=0; i<nsends; i++){
5173       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5174       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5175       for (j=0; j<nrows; j++) {
5176         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5177         for (l=0; l<sbs; l++){
5178           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5179           rowlen[j*sbs+l] = ncols;
5180           len += ncols;
5181           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5182         }
5183         k++;
5184       }
5185       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5186       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5187     }
5188     /* recvs and sends of i-array are completed */
5189     i = nrecvs;
5190     while (i--) {
5191       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5192     }
5193     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5194 
5195     /* allocate buffers for sending j and a arrays */
5196     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5197     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5198 
5199     /* create i-array of B_oth */
5200     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5201     b_othi[0] = 0;
5202     len = 0; /* total length of j or a array to be received */
5203     k = 0;
5204     for (i=0; i<nrecvs; i++){
5205       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5206       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5207       for (j=0; j<nrows; j++) {
5208         b_othi[k+1] = b_othi[k] + rowlen[j];
5209         len += rowlen[j]; k++;
5210       }
5211       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5212     }
5213 
5214     /* allocate space for j and a arrrays of B_oth */
5215     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5216     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5217 
5218     /* j-array */
5219     /*---------*/
5220     /*  post receives of j-array */
5221     for (i=0; i<nrecvs; i++){
5222       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5223       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5224     }
5225 
5226     /* pack the outgoing message j-array */
5227     k = 0;
5228     for (i=0; i<nsends; i++){
5229       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5230       bufJ = bufj+sstartsj[i];
5231       for (j=0; j<nrows; j++) {
5232         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5233         for (ll=0; ll<sbs; ll++){
5234           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5235           for (l=0; l<ncols; l++){
5236             *bufJ++ = cols[l];
5237           }
5238           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5239         }
5240       }
5241       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5242     }
5243 
5244     /* recvs and sends of j-array are completed */
5245     i = nrecvs;
5246     while (i--) {
5247       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5248     }
5249     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5250   } else if (scall == MAT_REUSE_MATRIX){
5251     sstartsj = *startsj;
5252     rstartsj = *startsj_r;
5253     bufa     = *bufa_ptr;
5254     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5255     b_otha   = b_oth->a;
5256   } else {
5257     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5258   }
5259 
5260   /* a-array */
5261   /*---------*/
5262   /*  post receives of a-array */
5263   for (i=0; i<nrecvs; i++){
5264     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5265     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5266   }
5267 
5268   /* pack the outgoing message a-array */
5269   k = 0;
5270   for (i=0; i<nsends; i++){
5271     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5272     bufA = bufa+sstartsj[i];
5273     for (j=0; j<nrows; j++) {
5274       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5275       for (ll=0; ll<sbs; ll++){
5276         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5277         for (l=0; l<ncols; l++){
5278           *bufA++ = vals[l];
5279         }
5280         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5281       }
5282     }
5283     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5284   }
5285   /* recvs and sends of a-array are completed */
5286   i = nrecvs;
5287   while (i--) {
5288     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5289   }
5290   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5291   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5292 
5293   if (scall == MAT_INITIAL_MATRIX){
5294     /* put together the new matrix */
5295     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5296 
5297     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5298     /* Since these are PETSc arrays, change flags to free them as necessary. */
5299     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5300     b_oth->free_a  = PETSC_TRUE;
5301     b_oth->free_ij = PETSC_TRUE;
5302     b_oth->nonew   = 0;
5303 
5304     ierr = PetscFree(bufj);CHKERRQ(ierr);
5305     if (!startsj || !bufa_ptr){
5306       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5307       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5308     } else {
5309       *startsj   = sstartsj;
5310       *startsj_r = rstartsj;
5311       *bufa_ptr  = bufa;
5312     }
5313   }
5314   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5315   PetscFunctionReturn(0);
5316 }
5317 
5318 #undef __FUNCT__
5319 #define __FUNCT__ "MatGetCommunicationStructs"
5320 /*@C
5321   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5322 
5323   Not Collective
5324 
5325   Input Parameters:
5326 . A - The matrix in mpiaij format
5327 
5328   Output Parameter:
5329 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5330 . colmap - A map from global column index to local index into lvec
5331 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5332 
5333   Level: developer
5334 
5335 @*/
5336 #if defined (PETSC_USE_CTABLE)
5337 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5338 #else
5339 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5340 #endif
5341 {
5342   Mat_MPIAIJ *a;
5343 
5344   PetscFunctionBegin;
5345   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5346   PetscValidPointer(lvec, 2);
5347   PetscValidPointer(colmap, 3);
5348   PetscValidPointer(multScatter, 4);
5349   a = (Mat_MPIAIJ *) A->data;
5350   if (lvec) *lvec = a->lvec;
5351   if (colmap) *colmap = a->colmap;
5352   if (multScatter) *multScatter = a->Mvctx;
5353   PetscFunctionReturn(0);
5354 }
5355 
5356 EXTERN_C_BEGIN
5357 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5358 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5359 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5360 EXTERN_C_END
5361 
5362 #undef __FUNCT__
5363 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5364 /*
5365     Computes (B'*A')' since computing B*A directly is untenable
5366 
5367                n                       p                          p
5368         (              )       (              )         (                  )
5369       m (      A       )  *  n (       B      )   =   m (         C        )
5370         (              )       (              )         (                  )
5371 
5372 */
5373 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5374 {
5375   PetscErrorCode     ierr;
5376   Mat                At,Bt,Ct;
5377 
5378   PetscFunctionBegin;
5379   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5380   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5381   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5382   ierr = MatDestroy(&At);CHKERRQ(ierr);
5383   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5384   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5385   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5386   PetscFunctionReturn(0);
5387 }
5388 
5389 #undef __FUNCT__
5390 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5391 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5392 {
5393   PetscErrorCode ierr;
5394   PetscInt       m=A->rmap->n,n=B->cmap->n;
5395   Mat            Cmat;
5396 
5397   PetscFunctionBegin;
5398   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5399   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5400   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5401   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5402   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5403   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5404   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5405   *C   = Cmat;
5406   PetscFunctionReturn(0);
5407 }
5408 
5409 /* ----------------------------------------------------------------*/
5410 #undef __FUNCT__
5411 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5412 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5413 {
5414   PetscErrorCode ierr;
5415 
5416   PetscFunctionBegin;
5417   if (scall == MAT_INITIAL_MATRIX){
5418     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5419   }
5420   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5421   PetscFunctionReturn(0);
5422 }
5423 
5424 EXTERN_C_BEGIN
5425 #if defined(PETSC_HAVE_MUMPS)
5426 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5427 #endif
5428 #if defined(PETSC_HAVE_PASTIX)
5429 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5430 #endif
5431 #if defined(PETSC_HAVE_SUPERLU_DIST)
5432 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5433 #endif
5434 #if defined(PETSC_HAVE_SPOOLES)
5435 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5436 #endif
5437 EXTERN_C_END
5438 
5439 /*MC
5440    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5441 
5442    Options Database Keys:
5443 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5444 
5445   Level: beginner
5446 
5447 .seealso: MatCreateMPIAIJ()
5448 M*/
5449 
5450 EXTERN_C_BEGIN
5451 #undef __FUNCT__
5452 #define __FUNCT__ "MatCreate_MPIAIJ"
5453 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5454 {
5455   Mat_MPIAIJ     *b;
5456   PetscErrorCode ierr;
5457   PetscMPIInt    size;
5458 
5459   PetscFunctionBegin;
5460   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5461 
5462   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5463   B->data         = (void*)b;
5464   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5465   B->rmap->bs     = 1;
5466   B->assembled    = PETSC_FALSE;
5467 
5468   B->insertmode   = NOT_SET_VALUES;
5469   b->size         = size;
5470   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5471 
5472   /* build cache for off array entries formed */
5473   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5474   b->donotstash  = PETSC_FALSE;
5475   b->colmap      = 0;
5476   b->garray      = 0;
5477   b->roworiented = PETSC_TRUE;
5478 
5479   /* stuff used for matrix vector multiply */
5480   b->lvec      = PETSC_NULL;
5481   b->Mvctx     = PETSC_NULL;
5482 
5483   /* stuff for MatGetRow() */
5484   b->rowindices   = 0;
5485   b->rowvalues    = 0;
5486   b->getrowactive = PETSC_FALSE;
5487 
5488 #if defined(PETSC_HAVE_SPOOLES)
5489   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5490                                      "MatGetFactor_mpiaij_spooles",
5491                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5492 #endif
5493 #if defined(PETSC_HAVE_MUMPS)
5494   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5495                                      "MatGetFactor_aij_mumps",
5496                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5497 #endif
5498 #if defined(PETSC_HAVE_PASTIX)
5499   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5500 					   "MatGetFactor_mpiaij_pastix",
5501 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5502 #endif
5503 #if defined(PETSC_HAVE_SUPERLU_DIST)
5504   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5505                                      "MatGetFactor_mpiaij_superlu_dist",
5506                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5507 #endif
5508   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5509                                      "MatStoreValues_MPIAIJ",
5510                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5511   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5512                                      "MatRetrieveValues_MPIAIJ",
5513                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5514   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5515 				     "MatGetDiagonalBlock_MPIAIJ",
5516                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5517   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5518 				     "MatIsTranspose_MPIAIJ",
5519 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5520   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5521 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5522 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5523   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5524 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5525 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5526   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5527 				     "MatDiagonalScaleLocal_MPIAIJ",
5528 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5529   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5530                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5531                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5532   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5533                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5534                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5535   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5536                                      "MatConvert_MPIAIJ_MPISBAIJ",
5537                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5538   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5539                                      "MatMatMult_MPIDense_MPIAIJ",
5540                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5541   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5542                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5543                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5544   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5545                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5546                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5547   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5548   PetscFunctionReturn(0);
5549 }
5550 EXTERN_C_END
5551 
5552 #undef __FUNCT__
5553 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5554 /*@
5555      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5556          and "off-diagonal" part of the matrix in CSR format.
5557 
5558    Collective on MPI_Comm
5559 
5560    Input Parameters:
5561 +  comm - MPI communicator
5562 .  m - number of local rows (Cannot be PETSC_DECIDE)
5563 .  n - This value should be the same as the local size used in creating the
5564        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5565        calculated if N is given) For square matrices n is almost always m.
5566 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5567 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5568 .   i - row indices for "diagonal" portion of matrix
5569 .   j - column indices
5570 .   a - matrix values
5571 .   oi - row indices for "off-diagonal" portion of matrix
5572 .   oj - column indices
5573 -   oa - matrix values
5574 
5575    Output Parameter:
5576 .   mat - the matrix
5577 
5578    Level: advanced
5579 
5580    Notes:
5581        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5582        must free the arrays once the matrix has been destroyed and not before.
5583 
5584        The i and j indices are 0 based
5585 
5586        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5587 
5588        This sets local rows and cannot be used to set off-processor values.
5589 
5590        You cannot later use MatSetValues() to change values in this matrix.
5591 
5592 .keywords: matrix, aij, compressed row, sparse, parallel
5593 
5594 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5595           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5596 @*/
5597 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5598 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5599 {
5600   PetscErrorCode ierr;
5601   Mat_MPIAIJ     *maij;
5602 
5603  PetscFunctionBegin;
5604   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5605   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5606   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5607   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5608   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5609   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5610   maij = (Mat_MPIAIJ*) (*mat)->data;
5611   maij->donotstash     = PETSC_TRUE;
5612   (*mat)->preallocated = PETSC_TRUE;
5613 
5614   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5615   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5616   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5617   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5618 
5619   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5620   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5621 
5622   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5623   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5624   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5625   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5626 
5627   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5628   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5629   PetscFunctionReturn(0);
5630 }
5631 
5632 /*
5633     Special version for direct calls from Fortran
5634 */
5635 #include <private/fortranimpl.h>
5636 
5637 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5638 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5639 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5640 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5641 #endif
5642 
5643 /* Change these macros so can be used in void function */
5644 #undef CHKERRQ
5645 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5646 #undef SETERRQ2
5647 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5648 #undef SETERRQ
5649 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5650 
5651 EXTERN_C_BEGIN
5652 #undef __FUNCT__
5653 #define __FUNCT__ "matsetvaluesmpiaij_"
5654 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5655 {
5656   Mat             mat = *mmat;
5657   PetscInt        m = *mm, n = *mn;
5658   InsertMode      addv = *maddv;
5659   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5660   PetscScalar     value;
5661   PetscErrorCode  ierr;
5662 
5663   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5664   if (mat->insertmode == NOT_SET_VALUES) {
5665     mat->insertmode = addv;
5666   }
5667 #if defined(PETSC_USE_DEBUG)
5668   else if (mat->insertmode != addv) {
5669     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5670   }
5671 #endif
5672   {
5673   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5674   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5675   PetscBool       roworiented = aij->roworiented;
5676 
5677   /* Some Variables required in the macro */
5678   Mat             A = aij->A;
5679   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5680   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5681   MatScalar       *aa = a->a;
5682   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5683   Mat             B = aij->B;
5684   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5685   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5686   MatScalar       *ba = b->a;
5687 
5688   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5689   PetscInt        nonew = a->nonew;
5690   MatScalar       *ap1,*ap2;
5691 
5692   PetscFunctionBegin;
5693   for (i=0; i<m; i++) {
5694     if (im[i] < 0) continue;
5695 #if defined(PETSC_USE_DEBUG)
5696     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5697 #endif
5698     if (im[i] >= rstart && im[i] < rend) {
5699       row      = im[i] - rstart;
5700       lastcol1 = -1;
5701       rp1      = aj + ai[row];
5702       ap1      = aa + ai[row];
5703       rmax1    = aimax[row];
5704       nrow1    = ailen[row];
5705       low1     = 0;
5706       high1    = nrow1;
5707       lastcol2 = -1;
5708       rp2      = bj + bi[row];
5709       ap2      = ba + bi[row];
5710       rmax2    = bimax[row];
5711       nrow2    = bilen[row];
5712       low2     = 0;
5713       high2    = nrow2;
5714 
5715       for (j=0; j<n; j++) {
5716         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5717         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5718         if (in[j] >= cstart && in[j] < cend){
5719           col = in[j] - cstart;
5720           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5721         } else if (in[j] < 0) continue;
5722 #if defined(PETSC_USE_DEBUG)
5723         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5724 #endif
5725         else {
5726           if (mat->was_assembled) {
5727             if (!aij->colmap) {
5728               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5729             }
5730 #if defined (PETSC_USE_CTABLE)
5731             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5732 	    col--;
5733 #else
5734             col = aij->colmap[in[j]] - 1;
5735 #endif
5736             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5737               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5738               col =  in[j];
5739               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5740               B = aij->B;
5741               b = (Mat_SeqAIJ*)B->data;
5742               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5743               rp2      = bj + bi[row];
5744               ap2      = ba + bi[row];
5745               rmax2    = bimax[row];
5746               nrow2    = bilen[row];
5747               low2     = 0;
5748               high2    = nrow2;
5749               bm       = aij->B->rmap->n;
5750               ba = b->a;
5751             }
5752           } else col = in[j];
5753           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5754         }
5755       }
5756     } else {
5757       if (!aij->donotstash) {
5758         if (roworiented) {
5759           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5760         } else {
5761           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5762         }
5763       }
5764     }
5765   }}
5766   PetscFunctionReturnVoid();
5767 }
5768 EXTERN_C_END
5769 
5770