xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3e336fdf4e375df90ef4b65b0f00c0c010d980fd)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew = a->nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506         } else if (in[j] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509 #endif
510         else {
511           if (mat->was_assembled) {
512             if (!aij->colmap) {
513               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
514             }
515 #if defined (PETSC_USE_CTABLE)
516             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
517 	    col--;
518 #else
519             col = aij->colmap[in[j]] - 1;
520 #endif
521             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523               col =  in[j];
524               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525               B = aij->B;
526               b = (Mat_SeqAIJ*)B->data;
527               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528               rp2      = bj + bi[row];
529               ap2      = ba + bi[row];
530               rmax2    = bimax[row];
531               nrow2    = bilen[row];
532               low2     = 0;
533               high2    = nrow2;
534               bm       = aij->B->rmap->n;
535               ba = b->a;
536             }
537           } else col = in[j];
538           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539         }
540       }
541     } else {
542       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543       if (!aij->donotstash) {
544         if (roworiented) {
545           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
546         } else {
547           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPIAIJ"
557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558 {
559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
560   PetscErrorCode ierr;
561   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
563 
564   PetscFunctionBegin;
565   for (i=0; i<m; i++) {
566     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568     if (idxm[i] >= rstart && idxm[i] < rend) {
569       row = idxm[i] - rstart;
570       for (j=0; j<n; j++) {
571         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573         if (idxn[j] >= cstart && idxn[j] < cend){
574           col = idxn[j] - cstart;
575           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
576         } else {
577           if (!aij->colmap) {
578             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
579           }
580 #if defined (PETSC_USE_CTABLE)
581           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
582           col --;
583 #else
584           col = aij->colmap[idxn[j]] - 1;
585 #endif
586           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587           else {
588             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604 {
605   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
606   PetscErrorCode ierr;
607   PetscInt       nstash,reallocs;
608   InsertMode     addv;
609 
610   PetscFunctionBegin;
611   if (aij->donotstash || mat->nooffprocentries) {
612     PetscFunctionReturn(0);
613   }
614 
615   /* make sure all processors are either in INSERTMODE or ADDMODE */
616   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
617   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618   mat->insertmode = addv; /* in case this processor had no cache */
619 
620   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
621   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
622   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
632   PetscErrorCode ierr;
633   PetscMPIInt    n;
634   PetscInt       i,j,rstart,ncols,flg;
635   PetscInt       *row,*col;
636   PetscBool      other_disassembled;
637   PetscScalar    *val;
638   InsertMode     addv = mat->insertmode;
639 
640   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
641   PetscFunctionBegin;
642   if (!aij->donotstash && !mat->nooffprocentries) {
643     while (1) {
644       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
645       if (!flg) break;
646 
647       for (i=0; i<n;) {
648         /* Now identify the consecutive vals belonging to the same row */
649         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650         if (j < n) ncols = j-i;
651         else       ncols = n-i;
652         /* Now assemble all these values with a single function call */
653         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
654         i = j;
655       }
656     }
657     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
658   }
659   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
660   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
661 
662   /* determine if any processor has disassembled, if so we must
663      also disassemble ourselfs, in order that we may reassemble. */
664   /*
665      if nonzero structure of submatrix B cannot change then we know that
666      no processor disassembled thus we can skip this stuff
667   */
668   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
669     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
670     if (mat->was_assembled && !other_disassembled) {
671       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
672     }
673   }
674   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
676   }
677   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
678   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
679   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
681 
682   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
683   aij->rowvalues = 0;
684 
685   /* used by MatAXPY() */
686   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
687   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
688 
689   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
690   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697 {
698   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
699   PetscErrorCode ierr;
700 
701   PetscFunctionBegin;
702   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
703   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatZeroRows_MPIAIJ"
709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710 {
711   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
712   PetscErrorCode    ierr;
713   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714   PetscInt          i,*owners = A->rmap->range;
715   PetscInt          *nprocs,j,idx,nsends,row;
716   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
717   PetscInt          *rvalues,count,base,slen,*source;
718   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
719   MPI_Comm          comm = ((PetscObject)A)->comm;
720   MPI_Request       *send_waits,*recv_waits;
721   MPI_Status        recv_status,*send_status;
722   const PetscScalar *xx;
723   PetscScalar       *bb;
724 #if defined(PETSC_DEBUG)
725   PetscBool      found = PETSC_FALSE;
726 #endif
727 
728   PetscFunctionBegin;
729   /*  first count number of contributors to each processor */
730   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
731   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
732   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
733   j = 0;
734   for (i=0; i<N; i++) {
735     if (lastidx > (idx = rows[i])) j = 0;
736     lastidx = idx;
737     for (; j<size; j++) {
738       if (idx >= owners[j] && idx < owners[j+1]) {
739         nprocs[2*j]++;
740         nprocs[2*j+1] = 1;
741         owner[i] = j;
742 #if defined(PETSC_DEBUG)
743         found = PETSC_TRUE;
744 #endif
745         break;
746       }
747     }
748 #if defined(PETSC_DEBUG)
749     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750     found = PETSC_FALSE;
751 #endif
752   }
753   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
754 
755   if (A->nooffproczerorows) {
756     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757     nrecvs = nsends;
758     nmax   = N;
759   } else {
760     /* inform other processors of number of messages and max length*/
761     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
762   }
763 
764   /* post receives:   */
765   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
766   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
767   for (i=0; i<nrecvs; i++) {
768     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
769   }
770 
771   /* do sends:
772       1) starts[i] gives the starting index in svalues for stuff going to
773          the ith processor
774   */
775   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
776   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
777   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
778   starts[0] = 0;
779   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780   for (i=0; i<N; i++) {
781     svalues[starts[owner[i]]++] = rows[i];
782   }
783 
784   starts[0] = 0;
785   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786   count = 0;
787   for (i=0; i<size; i++) {
788     if (nprocs[2*i+1]) {
789       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
790     }
791   }
792   ierr = PetscFree(starts);CHKERRQ(ierr);
793 
794   base = owners[rank];
795 
796   /*  wait on receives */
797   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
798   count  = nrecvs; slen = 0;
799   while (count) {
800     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
801     /* unpack receives into our local space */
802     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
803     source[imdex]  = recv_status.MPI_SOURCE;
804     lens[imdex]    = n;
805     slen          += n;
806     count--;
807   }
808   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
809 
810   /* move the data into the send scatter */
811   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
812   count = 0;
813   for (i=0; i<nrecvs; i++) {
814     values = rvalues + i*nmax;
815     for (j=0; j<lens[i]; j++) {
816       lrows[count++] = values[j] - base;
817     }
818   }
819   ierr = PetscFree(rvalues);CHKERRQ(ierr);
820   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
821   ierr = PetscFree(owner);CHKERRQ(ierr);
822   ierr = PetscFree(nprocs);CHKERRQ(ierr);
823 
824   /* fix right hand side if needed */
825   if (x && b) {
826     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
827     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
828     for (i=0; i<slen; i++) {
829       bb[lrows[i]] = diag*xx[lrows[i]];
830     }
831     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
833   }
834   /*
835         Zero the required rows. If the "diagonal block" of the matrix
836      is square and the user wishes to set the diagonal we use separate
837      code so that MatSetValues() is not called for each diagonal allocating
838      new memory, thus calling lots of mallocs and slowing things down.
839 
840   */
841   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
843   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850     }
851     for (i = 0; i < slen; i++) {
852       row  = lrows[i] + rstart;
853       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
854     }
855     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857   } else {
858     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(lrows);CHKERRQ(ierr);
861 
862   /* wait on sends */
863   if (nsends) {
864     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
865     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
866     ierr = PetscFree(send_status);CHKERRQ(ierr);
867   }
868   ierr = PetscFree(send_waits);CHKERRQ(ierr);
869   ierr = PetscFree(svalues);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
876 {
877   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode    ierr;
879   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
880   PetscInt          i,*owners = A->rmap->range;
881   PetscInt          *nprocs,j,idx,nsends;
882   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
883   PetscInt          *rvalues,count,base,slen,*source;
884   PetscInt          *lens,*lrows,*values,m;
885   MPI_Comm          comm = ((PetscObject)A)->comm;
886   MPI_Request       *send_waits,*recv_waits;
887   MPI_Status        recv_status,*send_status;
888   const PetscScalar *xx;
889   PetscScalar       *bb,*mask;
890   Vec               xmask,lmask;
891   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
892   const PetscInt    *aj, *ii,*ridx;
893   PetscScalar       *aa;
894 #if defined(PETSC_DEBUG)
895   PetscBool         found = PETSC_FALSE;
896 #endif
897 
898   PetscFunctionBegin;
899   /*  first count number of contributors to each processor */
900   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
901   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
902   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
903   j = 0;
904   for (i=0; i<N; i++) {
905     if (lastidx > (idx = rows[i])) j = 0;
906     lastidx = idx;
907     for (; j<size; j++) {
908       if (idx >= owners[j] && idx < owners[j+1]) {
909         nprocs[2*j]++;
910         nprocs[2*j+1] = 1;
911         owner[i] = j;
912 #if defined(PETSC_DEBUG)
913         found = PETSC_TRUE;
914 #endif
915         break;
916       }
917     }
918 #if defined(PETSC_DEBUG)
919     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
920     found = PETSC_FALSE;
921 #endif
922   }
923   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
924 
925   /* inform other processors of number of messages and max length*/
926   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
927 
928   /* post receives:   */
929   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
930   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
931   for (i=0; i<nrecvs; i++) {
932     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
933   }
934 
935   /* do sends:
936       1) starts[i] gives the starting index in svalues for stuff going to
937          the ith processor
938   */
939   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
940   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
941   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
942   starts[0] = 0;
943   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
944   for (i=0; i<N; i++) {
945     svalues[starts[owner[i]]++] = rows[i];
946   }
947 
948   starts[0] = 0;
949   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
950   count = 0;
951   for (i=0; i<size; i++) {
952     if (nprocs[2*i+1]) {
953       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
954     }
955   }
956   ierr = PetscFree(starts);CHKERRQ(ierr);
957 
958   base = owners[rank];
959 
960   /*  wait on receives */
961   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
962   count  = nrecvs; slen = 0;
963   while (count) {
964     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
965     /* unpack receives into our local space */
966     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
967     source[imdex]  = recv_status.MPI_SOURCE;
968     lens[imdex]    = n;
969     slen          += n;
970     count--;
971   }
972   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
973 
974   /* move the data into the send scatter */
975   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
976   count = 0;
977   for (i=0; i<nrecvs; i++) {
978     values = rvalues + i*nmax;
979     for (j=0; j<lens[i]; j++) {
980       lrows[count++] = values[j] - base;
981     }
982   }
983   ierr = PetscFree(rvalues);CHKERRQ(ierr);
984   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
985   ierr = PetscFree(owner);CHKERRQ(ierr);
986   ierr = PetscFree(nprocs);CHKERRQ(ierr);
987   /* lrows are the local rows to be zeroed, slen is the number of local rows */
988 
989   /* zero diagonal part of matrix */
990   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
991 
992   /* handle off diagonal part of matrix */
993   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
994   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
995   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
996   for (i=0; i<slen; i++) {
997     bb[lrows[i]] = 1;
998   }
999   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1000   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1003   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1006   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1007   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1008 
1009   /* remove zeroed rows of off diagonal matrix */
1010   ii = aij->i;
1011   for (i=0; i<slen; i++) {
1012     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1013   }
1014 
1015   /* loop over all elements of off process part of matrix zeroing removed columns*/
1016   if (aij->compressedrow.use){
1017     m    = aij->compressedrow.nrows;
1018     ii   = aij->compressedrow.i;
1019     ridx = aij->compressedrow.rindex;
1020     for (i=0; i<m; i++){
1021       n   = ii[i+1] - ii[i];
1022       aj  = aij->j + ii[i];
1023       aa  = aij->a + ii[i];
1024 
1025       for (j=0; j<n; j++) {
1026         if (PetscAbsScalar(mask[*aj])) {
1027           bb[*ridx] -= *aa*xx[*aj];
1028           *aa        = 0.0;
1029         }
1030         aa++;
1031         aj++;
1032       }
1033       ridx++;
1034     }
1035   } else { /* do not use compressed row format */
1036     m = l->B->rmap->n;
1037     for (i=0; i<m; i++) {
1038       n   = ii[i+1] - ii[i];
1039       aj  = aij->j + ii[i];
1040       aa  = aij->a + ii[i];
1041       for (j=0; j<n; j++) {
1042         if (PetscAbsScalar(mask[*aj])) {
1043           bb[i] -= *aa*xx[*aj];
1044           *aa    = 0.0;
1045         }
1046         aa++;
1047         aj++;
1048       }
1049     }
1050   }
1051   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1052   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1053   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1054   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1055   ierr = PetscFree(lrows);CHKERRQ(ierr);
1056 
1057   /* wait on sends */
1058   if (nsends) {
1059     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1060     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1061     ierr = PetscFree(send_status);CHKERRQ(ierr);
1062   }
1063   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1064   ierr = PetscFree(svalues);CHKERRQ(ierr);
1065 
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMult_MPIAIJ"
1071 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075   PetscInt       nt;
1076 
1077   PetscFunctionBegin;
1078   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1079   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1080   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1081   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1082   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1083   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1089 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1090 {
1091   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1101 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1102 {
1103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1108   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1109   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1116 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1117 {
1118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1119   PetscErrorCode ierr;
1120   PetscBool      merged;
1121 
1122   PetscFunctionBegin;
1123   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1124   /* do nondiagonal part */
1125   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1126   if (!merged) {
1127     /* send it on its way */
1128     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1129     /* do local part */
1130     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1131     /* receive remote parts: note this assumes the values are not actually */
1132     /* added in yy until the next line, */
1133     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1134   } else {
1135     /* do local part */
1136     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1137     /* send it on its way */
1138     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1139     /* values actually were received in the Begin() but we need to call this nop */
1140     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1141   }
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 EXTERN_C_BEGIN
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1148 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1149 {
1150   MPI_Comm       comm;
1151   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1152   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1153   IS             Me,Notme;
1154   PetscErrorCode ierr;
1155   PetscInt       M,N,first,last,*notme,i;
1156   PetscMPIInt    size;
1157 
1158   PetscFunctionBegin;
1159 
1160   /* Easy test: symmetric diagonal block */
1161   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1162   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1163   if (!*f) PetscFunctionReturn(0);
1164   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1165   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1166   if (size == 1) PetscFunctionReturn(0);
1167 
1168   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1169   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1170   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1171   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1172   for (i=0; i<first; i++) notme[i] = i;
1173   for (i=last; i<M; i++) notme[i-last+first] = i;
1174   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1175   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1176   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1177   Aoff = Aoffs[0];
1178   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1179   Boff = Boffs[0];
1180   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1181   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1182   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1183   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1184   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1185   ierr = PetscFree(notme);CHKERRQ(ierr);
1186    PetscFunctionReturn(0);
1187 }
1188 EXTERN_C_END
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1192 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1193 {
1194   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1195   PetscErrorCode ierr;
1196 
1197   PetscFunctionBegin;
1198   /* do nondiagonal part */
1199   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1200   /* send it on its way */
1201   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1202   /* do local part */
1203   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1204   /* receive remote parts */
1205   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 /*
1210   This only works correctly for square matrices where the subblock A->A is the
1211    diagonal block
1212 */
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1215 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1216 {
1217   PetscErrorCode ierr;
1218   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1219 
1220   PetscFunctionBegin;
1221   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1222   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1223   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatScale_MPIAIJ"
1229 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1230 {
1231   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1236   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatDestroy_MPIAIJ"
1242 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1243 {
1244   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1245   PetscErrorCode ierr;
1246 
1247   PetscFunctionBegin;
1248 #if defined(PETSC_USE_LOG)
1249   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1250 #endif
1251   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1252   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1253   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1254   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1255 #if defined (PETSC_USE_CTABLE)
1256   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1257 #else
1258   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1259 #endif
1260   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1262   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1263   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1264   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1265   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1266 
1267   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1281 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1282 {
1283   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1284   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1285   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1286   PetscErrorCode    ierr;
1287   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1288   int               fd;
1289   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1290   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1291   PetscScalar       *column_values;
1292   PetscInt          message_count,flowcontrolcount;
1293 
1294   PetscFunctionBegin;
1295   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1296   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1297   nz   = A->nz + B->nz;
1298   if (!rank) {
1299     header[0] = MAT_FILE_CLASSID;
1300     header[1] = mat->rmap->N;
1301     header[2] = mat->cmap->N;
1302     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1303     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1304     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1305     /* get largest number of rows any processor has */
1306     rlen = mat->rmap->n;
1307     range = mat->rmap->range;
1308     for (i=1; i<size; i++) {
1309       rlen = PetscMax(rlen,range[i+1] - range[i]);
1310     }
1311   } else {
1312     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1313     rlen = mat->rmap->n;
1314   }
1315 
1316   /* load up the local row counts */
1317   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1318   for (i=0; i<mat->rmap->n; i++) {
1319     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1320   }
1321 
1322   /* store the row lengths to the file */
1323   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1324   if (!rank) {
1325     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1326     for (i=1; i<size; i++) {
1327       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1328       rlen = range[i+1] - range[i];
1329       ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1330       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1331     }
1332     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1333   } else {
1334     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1335     ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1336     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1337   }
1338   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1339 
1340   /* load up the local column indices */
1341   nzmax = nz; /* )th processor needs space a largest processor needs */
1342   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1343   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1344   cnt  = 0;
1345   for (i=0; i<mat->rmap->n; i++) {
1346     for (j=B->i[i]; j<B->i[i+1]; j++) {
1347       if ( (col = garray[B->j[j]]) > cstart) break;
1348       column_indices[cnt++] = col;
1349     }
1350     for (k=A->i[i]; k<A->i[i+1]; k++) {
1351       column_indices[cnt++] = A->j[k] + cstart;
1352     }
1353     for (; j<B->i[i+1]; j++) {
1354       column_indices[cnt++] = garray[B->j[j]];
1355     }
1356   }
1357   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1358 
1359   /* store the column indices to the file */
1360    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1361   if (!rank) {
1362     MPI_Status status;
1363     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1364     for (i=1; i<size; i++) {
1365       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1366       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1367       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1368       ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1369       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1370     }
1371      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1372   } else {
1373     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1374     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1375     ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1376     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1377   }
1378   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1379 
1380   /* load up the local column values */
1381   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1382   cnt  = 0;
1383   for (i=0; i<mat->rmap->n; i++) {
1384     for (j=B->i[i]; j<B->i[i+1]; j++) {
1385       if ( garray[B->j[j]] > cstart) break;
1386       column_values[cnt++] = B->a[j];
1387     }
1388     for (k=A->i[i]; k<A->i[i+1]; k++) {
1389       column_values[cnt++] = A->a[k];
1390     }
1391     for (; j<B->i[i+1]; j++) {
1392       column_values[cnt++] = B->a[j];
1393     }
1394   }
1395   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1396 
1397   /* store the column values to the file */
1398    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1399   if (!rank) {
1400     MPI_Status status;
1401     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1402     for (i=1; i<size; i++) {
1403        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1404       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1405       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1406       ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1407       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1408     }
1409     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1410   } else {
1411     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1412     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1413     ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1414     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1415   }
1416   ierr = PetscFree(column_values);CHKERRQ(ierr);
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1422 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1423 {
1424   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1425   PetscErrorCode    ierr;
1426   PetscMPIInt       rank = aij->rank,size = aij->size;
1427   PetscBool         isdraw,iascii,isbinary;
1428   PetscViewer       sviewer;
1429   PetscViewerFormat format;
1430 
1431   PetscFunctionBegin;
1432   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1433   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1434   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1435   if (iascii) {
1436     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1437     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1438       MatInfo    info;
1439       PetscBool  inodes;
1440 
1441       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1442       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1443       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1444       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1445       if (!inodes) {
1446         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1447 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1448       } else {
1449         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1450 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1451       }
1452       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1453       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1454       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1455       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1456       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1457       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1458       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1459       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1460       PetscFunctionReturn(0);
1461     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1462       PetscInt   inodecount,inodelimit,*inodes;
1463       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1464       if (inodes) {
1465         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1466       } else {
1467         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1468       }
1469       PetscFunctionReturn(0);
1470     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1471       PetscFunctionReturn(0);
1472     }
1473   } else if (isbinary) {
1474     if (size == 1) {
1475       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1476       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1477     } else {
1478       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1479     }
1480     PetscFunctionReturn(0);
1481   } else if (isdraw) {
1482     PetscDraw  draw;
1483     PetscBool  isnull;
1484     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1485     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1486   }
1487 
1488   if (size == 1) {
1489     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1490     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1491   } else {
1492     /* assemble the entire matrix onto first processor. */
1493     Mat         A;
1494     Mat_SeqAIJ  *Aloc;
1495     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1496     MatScalar   *a;
1497 
1498     if (mat->rmap->N > 1024) {
1499       PetscBool  flg = PETSC_FALSE;
1500 
1501       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1502       if (!flg) {
1503         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1504       }
1505     }
1506 
1507     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1508     if (!rank) {
1509       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1510     } else {
1511       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1512     }
1513     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1514     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1515     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1516     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1517 
1518     /* copy over the A part */
1519     Aloc = (Mat_SeqAIJ*)aij->A->data;
1520     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1521     row = mat->rmap->rstart;
1522     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1523     for (i=0; i<m; i++) {
1524       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1525       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1526     }
1527     aj = Aloc->j;
1528     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1529 
1530     /* copy over the B part */
1531     Aloc = (Mat_SeqAIJ*)aij->B->data;
1532     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1533     row  = mat->rmap->rstart;
1534     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1535     ct   = cols;
1536     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1537     for (i=0; i<m; i++) {
1538       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1539       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1540     }
1541     ierr = PetscFree(ct);CHKERRQ(ierr);
1542     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1543     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1544     /*
1545        Everyone has to call to draw the matrix since the graphics waits are
1546        synchronized across all processors that share the PetscDraw object
1547     */
1548     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1549     if (!rank) {
1550       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1551       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1552       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1553       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1554     }
1555     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1556     ierr = MatDestroy(&A);CHKERRQ(ierr);
1557   }
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatView_MPIAIJ"
1563 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1564 {
1565   PetscErrorCode ierr;
1566   PetscBool      iascii,isdraw,issocket,isbinary;
1567 
1568   PetscFunctionBegin;
1569   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1570   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1571   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1572   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1573   if (iascii || isdraw || isbinary || issocket) {
1574     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1575   } else {
1576     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1577   }
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatSOR_MPIAIJ"
1583 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1584 {
1585   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1586   PetscErrorCode ierr;
1587   Vec            bb1 = 0;
1588   PetscBool      hasop;
1589 
1590   PetscFunctionBegin;
1591   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1592     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1593   }
1594 
1595   if (flag == SOR_APPLY_UPPER) {
1596     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1597     PetscFunctionReturn(0);
1598   }
1599 
1600   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1601     if (flag & SOR_ZERO_INITIAL_GUESS) {
1602       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1603       its--;
1604     }
1605 
1606     while (its--) {
1607       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1608       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1609 
1610       /* update rhs: bb1 = bb - B*x */
1611       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1612       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1613 
1614       /* local sweep */
1615       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1616     }
1617   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1618     if (flag & SOR_ZERO_INITIAL_GUESS) {
1619       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1620       its--;
1621     }
1622     while (its--) {
1623       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1624       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625 
1626       /* update rhs: bb1 = bb - B*x */
1627       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1628       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1629 
1630       /* local sweep */
1631       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1632     }
1633   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1634     if (flag & SOR_ZERO_INITIAL_GUESS) {
1635       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1636       its--;
1637     }
1638     while (its--) {
1639       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1640       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641 
1642       /* update rhs: bb1 = bb - B*x */
1643       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1644       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1645 
1646       /* local sweep */
1647       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1648     }
1649   }  else if (flag & SOR_EISENSTAT) {
1650     Vec         xx1;
1651 
1652     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1653     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1654 
1655     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1656     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657     if (!mat->diag) {
1658       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1659       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1660     }
1661     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1662     if (hasop) {
1663       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1664     } else {
1665       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1666     }
1667     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1668 
1669     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1670 
1671     /* local sweep */
1672     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1673     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1674     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1675   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1676 
1677   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatPermute_MPIAIJ"
1683 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1684 {
1685   MPI_Comm       comm,pcomm;
1686   PetscInt       first,local_size,nrows;
1687   const PetscInt *rows;
1688   PetscMPIInt    size;
1689   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1690   PetscErrorCode ierr;
1691 
1692   PetscFunctionBegin;
1693   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1694   /* make a collective version of 'rowp' */
1695   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1696   if (pcomm==comm) {
1697     crowp = rowp;
1698   } else {
1699     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1700     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1701     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1702     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1703   }
1704   /* collect the global row permutation and invert it */
1705   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1706   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1707   if (pcomm!=comm) {
1708     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1709   }
1710   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1711   /* get the local target indices */
1712   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1713   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1714   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1715   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1716   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1717   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1718   /* the column permutation is so much easier;
1719      make a local version of 'colp' and invert it */
1720   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1721   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1722   if (size==1) {
1723     lcolp = colp;
1724   } else {
1725     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1726     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1727     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1728   }
1729   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1730   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1731   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1732   if (size>1) {
1733     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1734     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1735   }
1736   /* now we just get the submatrix */
1737   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1738   /* clean up */
1739   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1740   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 #undef __FUNCT__
1745 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1746 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1747 {
1748   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1749   Mat            A = mat->A,B = mat->B;
1750   PetscErrorCode ierr;
1751   PetscReal      isend[5],irecv[5];
1752 
1753   PetscFunctionBegin;
1754   info->block_size     = 1.0;
1755   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1756   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1757   isend[3] = info->memory;  isend[4] = info->mallocs;
1758   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1759   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1760   isend[3] += info->memory;  isend[4] += info->mallocs;
1761   if (flag == MAT_LOCAL) {
1762     info->nz_used      = isend[0];
1763     info->nz_allocated = isend[1];
1764     info->nz_unneeded  = isend[2];
1765     info->memory       = isend[3];
1766     info->mallocs      = isend[4];
1767   } else if (flag == MAT_GLOBAL_MAX) {
1768     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1769     info->nz_used      = irecv[0];
1770     info->nz_allocated = irecv[1];
1771     info->nz_unneeded  = irecv[2];
1772     info->memory       = irecv[3];
1773     info->mallocs      = irecv[4];
1774   } else if (flag == MAT_GLOBAL_SUM) {
1775     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1776     info->nz_used      = irecv[0];
1777     info->nz_allocated = irecv[1];
1778     info->nz_unneeded  = irecv[2];
1779     info->memory       = irecv[3];
1780     info->mallocs      = irecv[4];
1781   }
1782   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1783   info->fill_ratio_needed = 0;
1784   info->factor_mallocs    = 0;
1785 
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 #undef __FUNCT__
1790 #define __FUNCT__ "MatSetOption_MPIAIJ"
1791 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1792 {
1793   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1794   PetscErrorCode ierr;
1795 
1796   PetscFunctionBegin;
1797   switch (op) {
1798   case MAT_NEW_NONZERO_LOCATIONS:
1799   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1800   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1801   case MAT_KEEP_NONZERO_PATTERN:
1802   case MAT_NEW_NONZERO_LOCATION_ERR:
1803   case MAT_USE_INODES:
1804   case MAT_IGNORE_ZERO_ENTRIES:
1805     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1806     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1807     break;
1808   case MAT_ROW_ORIENTED:
1809     a->roworiented = flg;
1810     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1811     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1812     break;
1813   case MAT_NEW_DIAGONALS:
1814     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1815     break;
1816   case MAT_IGNORE_OFF_PROC_ENTRIES:
1817     a->donotstash = flg;
1818     break;
1819   case MAT_SPD:
1820     A->spd_set                         = PETSC_TRUE;
1821     A->spd                             = flg;
1822     if (flg) {
1823       A->symmetric                     = PETSC_TRUE;
1824       A->structurally_symmetric        = PETSC_TRUE;
1825       A->symmetric_set                 = PETSC_TRUE;
1826       A->structurally_symmetric_set    = PETSC_TRUE;
1827     }
1828     break;
1829   case MAT_SYMMETRIC:
1830     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1831     break;
1832   case MAT_STRUCTURALLY_SYMMETRIC:
1833     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1834     break;
1835   case MAT_HERMITIAN:
1836     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1837     break;
1838   case MAT_SYMMETRY_ETERNAL:
1839     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1840     break;
1841   default:
1842     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1843   }
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 #undef __FUNCT__
1848 #define __FUNCT__ "MatGetRow_MPIAIJ"
1849 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1850 {
1851   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1852   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1853   PetscErrorCode ierr;
1854   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1855   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1856   PetscInt       *cmap,*idx_p;
1857 
1858   PetscFunctionBegin;
1859   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1860   mat->getrowactive = PETSC_TRUE;
1861 
1862   if (!mat->rowvalues && (idx || v)) {
1863     /*
1864         allocate enough space to hold information from the longest row.
1865     */
1866     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1867     PetscInt   max = 1,tmp;
1868     for (i=0; i<matin->rmap->n; i++) {
1869       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1870       if (max < tmp) { max = tmp; }
1871     }
1872     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1873   }
1874 
1875   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1876   lrow = row - rstart;
1877 
1878   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1879   if (!v)   {pvA = 0; pvB = 0;}
1880   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1881   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1882   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1883   nztot = nzA + nzB;
1884 
1885   cmap  = mat->garray;
1886   if (v  || idx) {
1887     if (nztot) {
1888       /* Sort by increasing column numbers, assuming A and B already sorted */
1889       PetscInt imark = -1;
1890       if (v) {
1891         *v = v_p = mat->rowvalues;
1892         for (i=0; i<nzB; i++) {
1893           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1894           else break;
1895         }
1896         imark = i;
1897         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1898         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1899       }
1900       if (idx) {
1901         *idx = idx_p = mat->rowindices;
1902         if (imark > -1) {
1903           for (i=0; i<imark; i++) {
1904             idx_p[i] = cmap[cworkB[i]];
1905           }
1906         } else {
1907           for (i=0; i<nzB; i++) {
1908             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1909             else break;
1910           }
1911           imark = i;
1912         }
1913         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1914         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1915       }
1916     } else {
1917       if (idx) *idx = 0;
1918       if (v)   *v   = 0;
1919     }
1920   }
1921   *nz = nztot;
1922   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1923   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 #undef __FUNCT__
1928 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1929 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1930 {
1931   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1932 
1933   PetscFunctionBegin;
1934   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1935   aij->getrowactive = PETSC_FALSE;
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatNorm_MPIAIJ"
1941 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1942 {
1943   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1944   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1945   PetscErrorCode ierr;
1946   PetscInt       i,j,cstart = mat->cmap->rstart;
1947   PetscReal      sum = 0.0;
1948   MatScalar      *v;
1949 
1950   PetscFunctionBegin;
1951   if (aij->size == 1) {
1952     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1953   } else {
1954     if (type == NORM_FROBENIUS) {
1955       v = amat->a;
1956       for (i=0; i<amat->nz; i++) {
1957 #if defined(PETSC_USE_COMPLEX)
1958         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1959 #else
1960         sum += (*v)*(*v); v++;
1961 #endif
1962       }
1963       v = bmat->a;
1964       for (i=0; i<bmat->nz; i++) {
1965 #if defined(PETSC_USE_COMPLEX)
1966         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1967 #else
1968         sum += (*v)*(*v); v++;
1969 #endif
1970       }
1971       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1972       *norm = PetscSqrtReal(*norm);
1973     } else if (type == NORM_1) { /* max column norm */
1974       PetscReal *tmp,*tmp2;
1975       PetscInt  *jj,*garray = aij->garray;
1976       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1977       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1978       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1979       *norm = 0.0;
1980       v = amat->a; jj = amat->j;
1981       for (j=0; j<amat->nz; j++) {
1982         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1983       }
1984       v = bmat->a; jj = bmat->j;
1985       for (j=0; j<bmat->nz; j++) {
1986         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1987       }
1988       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1989       for (j=0; j<mat->cmap->N; j++) {
1990         if (tmp2[j] > *norm) *norm = tmp2[j];
1991       }
1992       ierr = PetscFree(tmp);CHKERRQ(ierr);
1993       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1994     } else if (type == NORM_INFINITY) { /* max row norm */
1995       PetscReal ntemp = 0.0;
1996       for (j=0; j<aij->A->rmap->n; j++) {
1997         v = amat->a + amat->i[j];
1998         sum = 0.0;
1999         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2000           sum += PetscAbsScalar(*v); v++;
2001         }
2002         v = bmat->a + bmat->i[j];
2003         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2004           sum += PetscAbsScalar(*v); v++;
2005         }
2006         if (sum > ntemp) ntemp = sum;
2007       }
2008       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2009     } else {
2010       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2011     }
2012   }
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 #undef __FUNCT__
2017 #define __FUNCT__ "MatTranspose_MPIAIJ"
2018 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2019 {
2020   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2021   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2022   PetscErrorCode ierr;
2023   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2024   PetscInt       cstart=A->cmap->rstart,ncol;
2025   Mat            B;
2026   MatScalar      *array;
2027 
2028   PetscFunctionBegin;
2029   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2030 
2031   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2032   ai = Aloc->i; aj = Aloc->j;
2033   bi = Bloc->i; bj = Bloc->j;
2034   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2035     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2036     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2037     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2038     for (i=0; i<ai[ma]; i++){
2039       d_nnz[aj[i]] ++;
2040       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2041     }
2042 
2043     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2044     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2045     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2046     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2047     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2048   } else {
2049     B = *matout;
2050   }
2051 
2052   /* copy over the A part */
2053   array = Aloc->a;
2054   row = A->rmap->rstart;
2055   for (i=0; i<ma; i++) {
2056     ncol = ai[i+1]-ai[i];
2057     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2058     row++; array += ncol; aj += ncol;
2059   }
2060   aj = Aloc->j;
2061   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2062 
2063   /* copy over the B part */
2064   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2065   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2066   array = Bloc->a;
2067   row = A->rmap->rstart;
2068   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2069   cols_tmp = cols;
2070   for (i=0; i<mb; i++) {
2071     ncol = bi[i+1]-bi[i];
2072     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2073     row++; array += ncol; cols_tmp += ncol;
2074   }
2075   ierr = PetscFree(cols);CHKERRQ(ierr);
2076 
2077   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2080     *matout = B;
2081   } else {
2082     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2089 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2090 {
2091   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2092   Mat            a = aij->A,b = aij->B;
2093   PetscErrorCode ierr;
2094   PetscInt       s1,s2,s3;
2095 
2096   PetscFunctionBegin;
2097   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2098   if (rr) {
2099     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2100     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2101     /* Overlap communication with computation. */
2102     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2103   }
2104   if (ll) {
2105     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2106     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2107     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2108   }
2109   /* scale  the diagonal block */
2110   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2111 
2112   if (rr) {
2113     /* Do a scatter end and then right scale the off-diagonal block */
2114     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2115     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2116   }
2117 
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 #undef __FUNCT__
2122 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2123 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2124 {
2125   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2126   PetscErrorCode ierr;
2127 
2128   PetscFunctionBegin;
2129   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2130   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2131   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2132   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2133   PetscFunctionReturn(0);
2134 }
2135 #undef __FUNCT__
2136 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2137 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2138 {
2139   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2140   PetscErrorCode ierr;
2141 
2142   PetscFunctionBegin;
2143   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 #undef __FUNCT__
2148 #define __FUNCT__ "MatEqual_MPIAIJ"
2149 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2150 {
2151   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2152   Mat            a,b,c,d;
2153   PetscBool      flg;
2154   PetscErrorCode ierr;
2155 
2156   PetscFunctionBegin;
2157   a = matA->A; b = matA->B;
2158   c = matB->A; d = matB->B;
2159 
2160   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2161   if (flg) {
2162     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2163   }
2164   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatCopy_MPIAIJ"
2170 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2171 {
2172   PetscErrorCode ierr;
2173   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2174   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2175 
2176   PetscFunctionBegin;
2177   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2178   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2179     /* because of the column compression in the off-processor part of the matrix a->B,
2180        the number of columns in a->B and b->B may be different, hence we cannot call
2181        the MatCopy() directly on the two parts. If need be, we can provide a more
2182        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2183        then copying the submatrices */
2184     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2185   } else {
2186     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2187     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2188   }
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2194 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2195 {
2196   PetscErrorCode ierr;
2197 
2198   PetscFunctionBegin;
2199   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 #undef __FUNCT__
2204 #define __FUNCT__ "MatAXPY_MPIAIJ"
2205 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2206 {
2207   PetscErrorCode ierr;
2208   PetscInt       i;
2209   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2210   PetscBLASInt   bnz,one=1;
2211   Mat_SeqAIJ     *x,*y;
2212 
2213   PetscFunctionBegin;
2214   if (str == SAME_NONZERO_PATTERN) {
2215     PetscScalar alpha = a;
2216     x = (Mat_SeqAIJ *)xx->A->data;
2217     y = (Mat_SeqAIJ *)yy->A->data;
2218     bnz = PetscBLASIntCast(x->nz);
2219     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2220     x = (Mat_SeqAIJ *)xx->B->data;
2221     y = (Mat_SeqAIJ *)yy->B->data;
2222     bnz = PetscBLASIntCast(x->nz);
2223     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2224   } else if (str == SUBSET_NONZERO_PATTERN) {
2225     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2226 
2227     x = (Mat_SeqAIJ *)xx->B->data;
2228     y = (Mat_SeqAIJ *)yy->B->data;
2229     if (y->xtoy && y->XtoY != xx->B) {
2230       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2231       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2232     }
2233     if (!y->xtoy) { /* get xtoy */
2234       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2235       y->XtoY = xx->B;
2236       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2237     }
2238     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2239   } else {
2240     Mat B;
2241     PetscInt *nnz_d,*nnz_o;
2242     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2243     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2244     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2245     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2246     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2247     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2248     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2249     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2250     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2251     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2252     ierr = MatHeaderReplace(Y,B);
2253     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2254     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2255   }
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2260 
2261 #undef __FUNCT__
2262 #define __FUNCT__ "MatConjugate_MPIAIJ"
2263 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2264 {
2265 #if defined(PETSC_USE_COMPLEX)
2266   PetscErrorCode ierr;
2267   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2268 
2269   PetscFunctionBegin;
2270   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2271   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2272 #else
2273   PetscFunctionBegin;
2274 #endif
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 #undef __FUNCT__
2279 #define __FUNCT__ "MatRealPart_MPIAIJ"
2280 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2281 {
2282   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2283   PetscErrorCode ierr;
2284 
2285   PetscFunctionBegin;
2286   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2287   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2294 {
2295   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2300   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 #ifdef PETSC_HAVE_PBGL
2305 
2306 #include <boost/parallel/mpi/bsp_process_group.hpp>
2307 #include <boost/graph/distributed/ilu_default_graph.hpp>
2308 #include <boost/graph/distributed/ilu_0_block.hpp>
2309 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2310 #include <boost/graph/distributed/petsc/interface.hpp>
2311 #include <boost/multi_array.hpp>
2312 #include <boost/parallel/distributed_property_map->hpp>
2313 
2314 #undef __FUNCT__
2315 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2316 /*
2317   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2318 */
2319 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2320 {
2321   namespace petsc = boost::distributed::petsc;
2322 
2323   namespace graph_dist = boost::graph::distributed;
2324   using boost::graph::distributed::ilu_default::process_group_type;
2325   using boost::graph::ilu_permuted;
2326 
2327   PetscBool       row_identity, col_identity;
2328   PetscContainer  c;
2329   PetscInt        m, n, M, N;
2330   PetscErrorCode  ierr;
2331 
2332   PetscFunctionBegin;
2333   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2334   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2335   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2336   if (!row_identity || !col_identity) {
2337     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2338   }
2339 
2340   process_group_type pg;
2341   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2342   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2343   lgraph_type&   level_graph = *lgraph_p;
2344   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2345 
2346   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2347   ilu_permuted(level_graph);
2348 
2349   /* put together the new matrix */
2350   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2351   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2352   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2353   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2354   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2355   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2356   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2357 
2358   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2359   ierr = PetscContainerSetPointer(c, lgraph_p);
2360   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2361   ierr = PetscContainerDestroy(&c);
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2367 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2368 {
2369   PetscFunctionBegin;
2370   PetscFunctionReturn(0);
2371 }
2372 
2373 #undef __FUNCT__
2374 #define __FUNCT__ "MatSolve_MPIAIJ"
2375 /*
2376   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2377 */
2378 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2379 {
2380   namespace graph_dist = boost::graph::distributed;
2381 
2382   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2383   lgraph_type*   lgraph_p;
2384   PetscContainer c;
2385   PetscErrorCode ierr;
2386 
2387   PetscFunctionBegin;
2388   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2389   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2390   ierr = VecCopy(b, x);CHKERRQ(ierr);
2391 
2392   PetscScalar* array_x;
2393   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2394   PetscInt sx;
2395   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2396 
2397   PetscScalar* array_b;
2398   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2399   PetscInt sb;
2400   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2401 
2402   lgraph_type&   level_graph = *lgraph_p;
2403   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2404 
2405   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2406   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2407                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2408 
2409   typedef boost::iterator_property_map<array_ref_type::iterator,
2410                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2411   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2412                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2413 
2414   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2415 
2416   PetscFunctionReturn(0);
2417 }
2418 #endif
2419 
2420 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2421   PetscInt       nzlocal,nsends,nrecvs;
2422   PetscMPIInt    *send_rank,*recv_rank;
2423   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2424   PetscScalar    *sbuf_a,**rbuf_a;
2425   PetscErrorCode (*Destroy)(Mat);
2426 } Mat_Redundant;
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2430 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2431 {
2432   PetscErrorCode       ierr;
2433   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2434   PetscInt             i;
2435 
2436   PetscFunctionBegin;
2437   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2438   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2439   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2440   for (i=0; i<redund->nrecvs; i++){
2441     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2442     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2443   }
2444   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2445   ierr = PetscFree(redund);CHKERRQ(ierr);
2446   PetscFunctionReturn(0);
2447 }
2448 
2449 #undef __FUNCT__
2450 #define __FUNCT__ "MatDestroy_MatRedundant"
2451 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2452 {
2453   PetscErrorCode  ierr;
2454   PetscContainer  container;
2455   Mat_Redundant   *redund=PETSC_NULL;
2456 
2457   PetscFunctionBegin;
2458   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2459   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2460   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2461   A->ops->destroy = redund->Destroy;
2462   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2463   if (A->ops->destroy) {
2464     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2465   }
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 #undef __FUNCT__
2470 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2471 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2472 {
2473   PetscMPIInt    rank,size;
2474   MPI_Comm       comm=((PetscObject)mat)->comm;
2475   PetscErrorCode ierr;
2476   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2477   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2478   PetscInt       *rowrange=mat->rmap->range;
2479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2480   Mat            A=aij->A,B=aij->B,C=*matredundant;
2481   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2482   PetscScalar    *sbuf_a;
2483   PetscInt       nzlocal=a->nz+b->nz;
2484   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2485   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2486   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2487   MatScalar      *aworkA,*aworkB;
2488   PetscScalar    *vals;
2489   PetscMPIInt    tag1,tag2,tag3,imdex;
2490   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2491                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2492   MPI_Status     recv_status,*send_status;
2493   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2494   PetscInt       **rbuf_j=PETSC_NULL;
2495   PetscScalar    **rbuf_a=PETSC_NULL;
2496   Mat_Redundant  *redund=PETSC_NULL;
2497   PetscContainer container;
2498 
2499   PetscFunctionBegin;
2500   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2501   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2502 
2503   if (reuse == MAT_REUSE_MATRIX) {
2504     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2505     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2506     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2507     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2508     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2509     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2510     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2511     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2512 
2513     nsends    = redund->nsends;
2514     nrecvs    = redund->nrecvs;
2515     send_rank = redund->send_rank;
2516     recv_rank = redund->recv_rank;
2517     sbuf_nz   = redund->sbuf_nz;
2518     rbuf_nz   = redund->rbuf_nz;
2519     sbuf_j    = redund->sbuf_j;
2520     sbuf_a    = redund->sbuf_a;
2521     rbuf_j    = redund->rbuf_j;
2522     rbuf_a    = redund->rbuf_a;
2523   }
2524 
2525   if (reuse == MAT_INITIAL_MATRIX){
2526     PetscMPIInt  subrank,subsize;
2527     PetscInt     nleftover,np_subcomm;
2528     /* get the destination processors' id send_rank, nsends and nrecvs */
2529     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2530     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2531     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2532     np_subcomm = size/nsubcomm;
2533     nleftover  = size - nsubcomm*np_subcomm;
2534     nsends = 0; nrecvs = 0;
2535     for (i=0; i<size; i++){ /* i=rank*/
2536       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2537         send_rank[nsends] = i; nsends++;
2538         recv_rank[nrecvs++] = i;
2539       }
2540     }
2541     if (rank >= size - nleftover){/* this proc is a leftover processor */
2542       i = size-nleftover-1;
2543       j = 0;
2544       while (j < nsubcomm - nleftover){
2545         send_rank[nsends++] = i;
2546         i--; j++;
2547       }
2548     }
2549 
2550     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2551       for (i=0; i<nleftover; i++){
2552         recv_rank[nrecvs++] = size-nleftover+i;
2553       }
2554     }
2555 
2556     /* allocate sbuf_j, sbuf_a */
2557     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2558     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2559     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2560   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2561 
2562   /* copy mat's local entries into the buffers */
2563   if (reuse == MAT_INITIAL_MATRIX){
2564     rownz_max = 0;
2565     rptr = sbuf_j;
2566     cols = sbuf_j + rend-rstart + 1;
2567     vals = sbuf_a;
2568     rptr[0] = 0;
2569     for (i=0; i<rend-rstart; i++){
2570       row = i + rstart;
2571       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2572       ncols  = nzA + nzB;
2573       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2574       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2575       /* load the column indices for this row into cols */
2576       lwrite = 0;
2577       for (l=0; l<nzB; l++) {
2578         if ((ctmp = bmap[cworkB[l]]) < cstart){
2579           vals[lwrite]   = aworkB[l];
2580           cols[lwrite++] = ctmp;
2581         }
2582       }
2583       for (l=0; l<nzA; l++){
2584         vals[lwrite]   = aworkA[l];
2585         cols[lwrite++] = cstart + cworkA[l];
2586       }
2587       for (l=0; l<nzB; l++) {
2588         if ((ctmp = bmap[cworkB[l]]) >= cend){
2589           vals[lwrite]   = aworkB[l];
2590           cols[lwrite++] = ctmp;
2591         }
2592       }
2593       vals += ncols;
2594       cols += ncols;
2595       rptr[i+1] = rptr[i] + ncols;
2596       if (rownz_max < ncols) rownz_max = ncols;
2597     }
2598     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2599   } else { /* only copy matrix values into sbuf_a */
2600     rptr = sbuf_j;
2601     vals = sbuf_a;
2602     rptr[0] = 0;
2603     for (i=0; i<rend-rstart; i++){
2604       row = i + rstart;
2605       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2606       ncols  = nzA + nzB;
2607       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2608       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2609       lwrite = 0;
2610       for (l=0; l<nzB; l++) {
2611         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2612       }
2613       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2614       for (l=0; l<nzB; l++) {
2615         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2616       }
2617       vals += ncols;
2618       rptr[i+1] = rptr[i] + ncols;
2619     }
2620   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2621 
2622   /* send nzlocal to others, and recv other's nzlocal */
2623   /*--------------------------------------------------*/
2624   if (reuse == MAT_INITIAL_MATRIX){
2625     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2626     s_waits2 = s_waits3 + nsends;
2627     s_waits1 = s_waits2 + nsends;
2628     r_waits1 = s_waits1 + nsends;
2629     r_waits2 = r_waits1 + nrecvs;
2630     r_waits3 = r_waits2 + nrecvs;
2631   } else {
2632     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2633     r_waits3 = s_waits3 + nsends;
2634   }
2635 
2636   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2637   if (reuse == MAT_INITIAL_MATRIX){
2638     /* get new tags to keep the communication clean */
2639     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2640     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2641     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2642 
2643     /* post receives of other's nzlocal */
2644     for (i=0; i<nrecvs; i++){
2645       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2646     }
2647     /* send nzlocal to others */
2648     for (i=0; i<nsends; i++){
2649       sbuf_nz[i] = nzlocal;
2650       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2651     }
2652     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2653     count = nrecvs;
2654     while (count) {
2655       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2656       recv_rank[imdex] = recv_status.MPI_SOURCE;
2657       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2658       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2659 
2660       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2661       rbuf_nz[imdex] += i + 2;
2662       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2663       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2664       count--;
2665     }
2666     /* wait on sends of nzlocal */
2667     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2668     /* send mat->i,j to others, and recv from other's */
2669     /*------------------------------------------------*/
2670     for (i=0; i<nsends; i++){
2671       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2672       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2673     }
2674     /* wait on receives of mat->i,j */
2675     /*------------------------------*/
2676     count = nrecvs;
2677     while (count) {
2678       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2679       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2680       count--;
2681     }
2682     /* wait on sends of mat->i,j */
2683     /*---------------------------*/
2684     if (nsends) {
2685       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2686     }
2687   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2688 
2689   /* post receives, send and receive mat->a */
2690   /*----------------------------------------*/
2691   for (imdex=0; imdex<nrecvs; imdex++) {
2692     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2693   }
2694   for (i=0; i<nsends; i++){
2695     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2696   }
2697   count = nrecvs;
2698   while (count) {
2699     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2700     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2701     count--;
2702   }
2703   if (nsends) {
2704     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2705   }
2706 
2707   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2708 
2709   /* create redundant matrix */
2710   /*-------------------------*/
2711   if (reuse == MAT_INITIAL_MATRIX){
2712     /* compute rownz_max for preallocation */
2713     for (imdex=0; imdex<nrecvs; imdex++){
2714       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2715       rptr = rbuf_j[imdex];
2716       for (i=0; i<j; i++){
2717         ncols = rptr[i+1] - rptr[i];
2718         if (rownz_max < ncols) rownz_max = ncols;
2719       }
2720     }
2721 
2722     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2723     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2724     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2725     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2726     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2727   } else {
2728     C = *matredundant;
2729   }
2730 
2731   /* insert local matrix entries */
2732   rptr = sbuf_j;
2733   cols = sbuf_j + rend-rstart + 1;
2734   vals = sbuf_a;
2735   for (i=0; i<rend-rstart; i++){
2736     row   = i + rstart;
2737     ncols = rptr[i+1] - rptr[i];
2738     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2739     vals += ncols;
2740     cols += ncols;
2741   }
2742   /* insert received matrix entries */
2743   for (imdex=0; imdex<nrecvs; imdex++){
2744     rstart = rowrange[recv_rank[imdex]];
2745     rend   = rowrange[recv_rank[imdex]+1];
2746     rptr = rbuf_j[imdex];
2747     cols = rbuf_j[imdex] + rend-rstart + 1;
2748     vals = rbuf_a[imdex];
2749     for (i=0; i<rend-rstart; i++){
2750       row   = i + rstart;
2751       ncols = rptr[i+1] - rptr[i];
2752       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2753       vals += ncols;
2754       cols += ncols;
2755     }
2756   }
2757   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2758   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2759   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2760   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2761   if (reuse == MAT_INITIAL_MATRIX) {
2762     PetscContainer container;
2763     *matredundant = C;
2764     /* create a supporting struct and attach it to C for reuse */
2765     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2766     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2767     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2768     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2769     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2770     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2771 
2772     redund->nzlocal = nzlocal;
2773     redund->nsends  = nsends;
2774     redund->nrecvs  = nrecvs;
2775     redund->send_rank = send_rank;
2776     redund->recv_rank = recv_rank;
2777     redund->sbuf_nz = sbuf_nz;
2778     redund->rbuf_nz = rbuf_nz;
2779     redund->sbuf_j  = sbuf_j;
2780     redund->sbuf_a  = sbuf_a;
2781     redund->rbuf_j  = rbuf_j;
2782     redund->rbuf_a  = rbuf_a;
2783 
2784     redund->Destroy = C->ops->destroy;
2785     C->ops->destroy = MatDestroy_MatRedundant;
2786   }
2787   PetscFunctionReturn(0);
2788 }
2789 
2790 #undef __FUNCT__
2791 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2792 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2793 {
2794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2795   PetscErrorCode ierr;
2796   PetscInt       i,*idxb = 0;
2797   PetscScalar    *va,*vb;
2798   Vec            vtmp;
2799 
2800   PetscFunctionBegin;
2801   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2802   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2803   if (idx) {
2804     for (i=0; i<A->rmap->n; i++) {
2805       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2806     }
2807   }
2808 
2809   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2810   if (idx) {
2811     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2812   }
2813   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2814   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2815 
2816   for (i=0; i<A->rmap->n; i++){
2817     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2818       va[i] = vb[i];
2819       if (idx) idx[i] = a->garray[idxb[i]];
2820     }
2821   }
2822 
2823   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2824   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2825   ierr = PetscFree(idxb);CHKERRQ(ierr);
2826   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2827   PetscFunctionReturn(0);
2828 }
2829 
2830 #undef __FUNCT__
2831 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2832 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2833 {
2834   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2835   PetscErrorCode ierr;
2836   PetscInt       i,*idxb = 0;
2837   PetscScalar    *va,*vb;
2838   Vec            vtmp;
2839 
2840   PetscFunctionBegin;
2841   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2842   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2843   if (idx) {
2844     for (i=0; i<A->cmap->n; i++) {
2845       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2846     }
2847   }
2848 
2849   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2850   if (idx) {
2851     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2852   }
2853   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2854   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2855 
2856   for (i=0; i<A->rmap->n; i++){
2857     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2858       va[i] = vb[i];
2859       if (idx) idx[i] = a->garray[idxb[i]];
2860     }
2861   }
2862 
2863   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2864   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2865   ierr = PetscFree(idxb);CHKERRQ(ierr);
2866   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2867   PetscFunctionReturn(0);
2868 }
2869 
2870 #undef __FUNCT__
2871 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2872 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2873 {
2874   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2875   PetscInt       n      = A->rmap->n;
2876   PetscInt       cstart = A->cmap->rstart;
2877   PetscInt      *cmap   = mat->garray;
2878   PetscInt      *diagIdx, *offdiagIdx;
2879   Vec            diagV, offdiagV;
2880   PetscScalar   *a, *diagA, *offdiagA;
2881   PetscInt       r;
2882   PetscErrorCode ierr;
2883 
2884   PetscFunctionBegin;
2885   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2886   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2887   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2888   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2889   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2890   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2891   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2892   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2893   for(r = 0; r < n; ++r) {
2894     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2895       a[r]   = diagA[r];
2896       idx[r] = cstart + diagIdx[r];
2897     } else {
2898       a[r]   = offdiagA[r];
2899       idx[r] = cmap[offdiagIdx[r]];
2900     }
2901   }
2902   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2903   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2904   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2905   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2906   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2907   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2908   PetscFunctionReturn(0);
2909 }
2910 
2911 #undef __FUNCT__
2912 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2913 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2914 {
2915   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2916   PetscInt       n      = A->rmap->n;
2917   PetscInt       cstart = A->cmap->rstart;
2918   PetscInt      *cmap   = mat->garray;
2919   PetscInt      *diagIdx, *offdiagIdx;
2920   Vec            diagV, offdiagV;
2921   PetscScalar   *a, *diagA, *offdiagA;
2922   PetscInt       r;
2923   PetscErrorCode ierr;
2924 
2925   PetscFunctionBegin;
2926   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2927   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2928   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2929   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2930   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2931   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2932   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2933   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2934   for(r = 0; r < n; ++r) {
2935     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2936       a[r]   = diagA[r];
2937       idx[r] = cstart + diagIdx[r];
2938     } else {
2939       a[r]   = offdiagA[r];
2940       idx[r] = cmap[offdiagIdx[r]];
2941     }
2942   }
2943   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2944   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2945   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2946   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2947   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2948   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2949   PetscFunctionReturn(0);
2950 }
2951 
2952 #undef __FUNCT__
2953 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2954 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2955 {
2956   PetscErrorCode ierr;
2957   Mat            *dummy;
2958 
2959   PetscFunctionBegin;
2960   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2961   *newmat = *dummy;
2962   ierr = PetscFree(dummy);CHKERRQ(ierr);
2963   PetscFunctionReturn(0);
2964 }
2965 
2966 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2967 
2968 #undef __FUNCT__
2969 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2970 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2971 {
2972   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2973   PetscErrorCode ierr;
2974 
2975   PetscFunctionBegin;
2976   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 
2981 /* -------------------------------------------------------------------*/
2982 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2983        MatGetRow_MPIAIJ,
2984        MatRestoreRow_MPIAIJ,
2985        MatMult_MPIAIJ,
2986 /* 4*/ MatMultAdd_MPIAIJ,
2987        MatMultTranspose_MPIAIJ,
2988        MatMultTransposeAdd_MPIAIJ,
2989 #ifdef PETSC_HAVE_PBGL
2990        MatSolve_MPIAIJ,
2991 #else
2992        0,
2993 #endif
2994        0,
2995        0,
2996 /*10*/ 0,
2997        0,
2998        0,
2999        MatSOR_MPIAIJ,
3000        MatTranspose_MPIAIJ,
3001 /*15*/ MatGetInfo_MPIAIJ,
3002        MatEqual_MPIAIJ,
3003        MatGetDiagonal_MPIAIJ,
3004        MatDiagonalScale_MPIAIJ,
3005        MatNorm_MPIAIJ,
3006 /*20*/ MatAssemblyBegin_MPIAIJ,
3007        MatAssemblyEnd_MPIAIJ,
3008        MatSetOption_MPIAIJ,
3009        MatZeroEntries_MPIAIJ,
3010 /*24*/ MatZeroRows_MPIAIJ,
3011        0,
3012 #ifdef PETSC_HAVE_PBGL
3013        0,
3014 #else
3015        0,
3016 #endif
3017        0,
3018        0,
3019 /*29*/ MatSetUpPreallocation_MPIAIJ,
3020 #ifdef PETSC_HAVE_PBGL
3021        0,
3022 #else
3023        0,
3024 #endif
3025        0,
3026        0,
3027        0,
3028 /*34*/ MatDuplicate_MPIAIJ,
3029        0,
3030        0,
3031        0,
3032        0,
3033 /*39*/ MatAXPY_MPIAIJ,
3034        MatGetSubMatrices_MPIAIJ,
3035        MatIncreaseOverlap_MPIAIJ,
3036        MatGetValues_MPIAIJ,
3037        MatCopy_MPIAIJ,
3038 /*44*/ MatGetRowMax_MPIAIJ,
3039        MatScale_MPIAIJ,
3040        0,
3041        0,
3042        MatZeroRowsColumns_MPIAIJ,
3043 /*49*/ MatSetBlockSize_MPIAIJ,
3044        0,
3045        0,
3046        0,
3047        0,
3048 /*54*/ MatFDColoringCreate_MPIAIJ,
3049        0,
3050        MatSetUnfactored_MPIAIJ,
3051        MatPermute_MPIAIJ,
3052        0,
3053 /*59*/ MatGetSubMatrix_MPIAIJ,
3054        MatDestroy_MPIAIJ,
3055        MatView_MPIAIJ,
3056        0,
3057        0,
3058 /*64*/ 0,
3059        0,
3060        0,
3061        0,
3062        0,
3063 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3064        MatGetRowMinAbs_MPIAIJ,
3065        0,
3066        MatSetColoring_MPIAIJ,
3067 #if defined(PETSC_HAVE_ADIC)
3068        MatSetValuesAdic_MPIAIJ,
3069 #else
3070        0,
3071 #endif
3072        MatSetValuesAdifor_MPIAIJ,
3073 /*75*/ MatFDColoringApply_AIJ,
3074        0,
3075        0,
3076        0,
3077        0,
3078 /*80*/ 0,
3079        0,
3080        0,
3081 /*83*/ MatLoad_MPIAIJ,
3082        0,
3083        0,
3084        0,
3085        0,
3086        0,
3087 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3088        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3089        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3090        MatPtAP_Basic,
3091        MatPtAPSymbolic_MPIAIJ,
3092 /*94*/ MatPtAPNumeric_MPIAIJ,
3093        0,
3094        0,
3095        0,
3096        0,
3097 /*99*/ 0,
3098        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3099        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3100        MatConjugate_MPIAIJ,
3101        0,
3102 /*104*/MatSetValuesRow_MPIAIJ,
3103        MatRealPart_MPIAIJ,
3104        MatImaginaryPart_MPIAIJ,
3105        0,
3106        0,
3107 /*109*/0,
3108        MatGetRedundantMatrix_MPIAIJ,
3109        MatGetRowMin_MPIAIJ,
3110        0,
3111        0,
3112 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3113        0,
3114        0,
3115        0,
3116        0,
3117 /*119*/0,
3118        0,
3119        0,
3120        0,
3121        MatGetMultiProcBlock_MPIAIJ,
3122 /*124*/MatFindNonZeroRows_MPIAIJ,
3123        MatGetColumnNorms_MPIAIJ,
3124        MatInvertBlockDiagonal_MPIAIJ,
3125        0,
3126        MatGetSubMatricesParallel_MPIAIJ,
3127 /*129*/0
3128 };
3129 
3130 /* ----------------------------------------------------------------------------------------*/
3131 
3132 EXTERN_C_BEGIN
3133 #undef __FUNCT__
3134 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3135 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3136 {
3137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3138   PetscErrorCode ierr;
3139 
3140   PetscFunctionBegin;
3141   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3142   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3143   PetscFunctionReturn(0);
3144 }
3145 EXTERN_C_END
3146 
3147 EXTERN_C_BEGIN
3148 #undef __FUNCT__
3149 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3150 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3151 {
3152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3153   PetscErrorCode ierr;
3154 
3155   PetscFunctionBegin;
3156   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3157   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3158   PetscFunctionReturn(0);
3159 }
3160 EXTERN_C_END
3161 
3162 EXTERN_C_BEGIN
3163 #undef __FUNCT__
3164 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3165 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3166 {
3167   Mat_MPIAIJ     *b;
3168   PetscErrorCode ierr;
3169   PetscInt       i;
3170 
3171   PetscFunctionBegin;
3172   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3173   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3174   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3175   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3176 
3177   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3178   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3179   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3180   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3181   if (d_nnz) {
3182     for (i=0; i<B->rmap->n; i++) {
3183       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3184     }
3185   }
3186   if (o_nnz) {
3187     for (i=0; i<B->rmap->n; i++) {
3188       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3189     }
3190   }
3191   b = (Mat_MPIAIJ*)B->data;
3192 
3193   if (!B->preallocated) {
3194     /* Explicitly create 2 MATSEQAIJ matrices. */
3195     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3196     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3197     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3198     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3199     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3200     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3201     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3202     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3203   }
3204 
3205   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3206   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3207   B->preallocated = PETSC_TRUE;
3208   PetscFunctionReturn(0);
3209 }
3210 EXTERN_C_END
3211 
3212 #undef __FUNCT__
3213 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3214 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3215 {
3216   Mat            mat;
3217   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3218   PetscErrorCode ierr;
3219 
3220   PetscFunctionBegin;
3221   *newmat       = 0;
3222   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3223   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3224   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3225   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3226   a    = (Mat_MPIAIJ*)mat->data;
3227 
3228   mat->factortype    = matin->factortype;
3229   mat->rmap->bs      = matin->rmap->bs;
3230   mat->assembled    = PETSC_TRUE;
3231   mat->insertmode   = NOT_SET_VALUES;
3232   mat->preallocated = PETSC_TRUE;
3233 
3234   a->size           = oldmat->size;
3235   a->rank           = oldmat->rank;
3236   a->donotstash     = oldmat->donotstash;
3237   a->roworiented    = oldmat->roworiented;
3238   a->rowindices     = 0;
3239   a->rowvalues      = 0;
3240   a->getrowactive   = PETSC_FALSE;
3241 
3242   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3243   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3244 
3245   if (oldmat->colmap) {
3246 #if defined (PETSC_USE_CTABLE)
3247     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3248 #else
3249     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3250     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3251     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3252 #endif
3253   } else a->colmap = 0;
3254   if (oldmat->garray) {
3255     PetscInt len;
3256     len  = oldmat->B->cmap->n;
3257     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3258     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3259     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3260   } else a->garray = 0;
3261 
3262   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3263   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3264   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3265   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3266   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3267   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3268   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3269   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3270   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3271   *newmat = mat;
3272   PetscFunctionReturn(0);
3273 }
3274 
3275 
3276 
3277 #undef __FUNCT__
3278 #define __FUNCT__ "MatLoad_MPIAIJ"
3279 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3280 {
3281   PetscScalar    *vals,*svals;
3282   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3283   PetscErrorCode ierr;
3284   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3285   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3286   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3287   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3288   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3289   int            fd;
3290 
3291   PetscFunctionBegin;
3292   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3293   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3294   if (!rank) {
3295     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3296     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3297     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3298   }
3299 
3300   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3301 
3302   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3303   M = header[1]; N = header[2];
3304   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3305   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3306   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3307 
3308   /* If global sizes are set, check if they are consistent with that given in the file */
3309   if (sizesset) {
3310     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3311   }
3312   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3313   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3314 
3315   /* determine ownership of all rows */
3316   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3317   else m = newMat->rmap->n; /* Set by user */
3318 
3319   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3320   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3321 
3322   /* First process needs enough room for process with most rows */
3323   if (!rank) {
3324     mmax       = rowners[1];
3325     for (i=2; i<size; i++) {
3326       mmax = PetscMax(mmax,rowners[i]);
3327     }
3328   } else mmax = m;
3329 
3330   rowners[0] = 0;
3331   for (i=2; i<=size; i++) {
3332     rowners[i] += rowners[i-1];
3333   }
3334   rstart = rowners[rank];
3335   rend   = rowners[rank+1];
3336 
3337   /* distribute row lengths to all processors */
3338   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3339   if (!rank) {
3340     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3341     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3342     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3343     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3344     for (j=0; j<m; j++) {
3345       procsnz[0] += ourlens[j];
3346     }
3347     for (i=1; i<size; i++) {
3348       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3349       /* calculate the number of nonzeros on each processor */
3350       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3351         procsnz[i] += rowlengths[j];
3352       }
3353       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3354     }
3355     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3356   } else {
3357     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3358   }
3359 
3360   if (!rank) {
3361     /* determine max buffer needed and allocate it */
3362     maxnz = 0;
3363     for (i=0; i<size; i++) {
3364       maxnz = PetscMax(maxnz,procsnz[i]);
3365     }
3366     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3367 
3368     /* read in my part of the matrix column indices  */
3369     nz   = procsnz[0];
3370     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3371     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3372 
3373     /* read in every one elses and ship off */
3374     for (i=1; i<size; i++) {
3375       nz     = procsnz[i];
3376       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3377       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3378     }
3379     ierr = PetscFree(cols);CHKERRQ(ierr);
3380   } else {
3381     /* determine buffer space needed for message */
3382     nz = 0;
3383     for (i=0; i<m; i++) {
3384       nz += ourlens[i];
3385     }
3386     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3387 
3388     /* receive message of column indices*/
3389     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3390   }
3391 
3392   /* determine column ownership if matrix is not square */
3393   if (N != M) {
3394     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3395     else n = newMat->cmap->n;
3396     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3397     cstart = cend - n;
3398   } else {
3399     cstart = rstart;
3400     cend   = rend;
3401     n      = cend - cstart;
3402   }
3403 
3404   /* loop over local rows, determining number of off diagonal entries */
3405   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3406   jj = 0;
3407   for (i=0; i<m; i++) {
3408     for (j=0; j<ourlens[i]; j++) {
3409       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3410       jj++;
3411     }
3412   }
3413 
3414   for (i=0; i<m; i++) {
3415     ourlens[i] -= offlens[i];
3416   }
3417   if (!sizesset) {
3418     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3419   }
3420   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3421 
3422   for (i=0; i<m; i++) {
3423     ourlens[i] += offlens[i];
3424   }
3425 
3426   if (!rank) {
3427     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3428 
3429     /* read in my part of the matrix numerical values  */
3430     nz   = procsnz[0];
3431     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3432 
3433     /* insert into matrix */
3434     jj      = rstart;
3435     smycols = mycols;
3436     svals   = vals;
3437     for (i=0; i<m; i++) {
3438       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3439       smycols += ourlens[i];
3440       svals   += ourlens[i];
3441       jj++;
3442     }
3443 
3444     /* read in other processors and ship out */
3445     for (i=1; i<size; i++) {
3446       nz     = procsnz[i];
3447       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3448       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3449     }
3450     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3451   } else {
3452     /* receive numeric values */
3453     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3454 
3455     /* receive message of values*/
3456     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3457 
3458     /* insert into matrix */
3459     jj      = rstart;
3460     smycols = mycols;
3461     svals   = vals;
3462     for (i=0; i<m; i++) {
3463       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3464       smycols += ourlens[i];
3465       svals   += ourlens[i];
3466       jj++;
3467     }
3468   }
3469   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3470   ierr = PetscFree(vals);CHKERRQ(ierr);
3471   ierr = PetscFree(mycols);CHKERRQ(ierr);
3472   ierr = PetscFree(rowners);CHKERRQ(ierr);
3473 
3474   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3475   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3476   PetscFunctionReturn(0);
3477 }
3478 
3479 #undef __FUNCT__
3480 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3481 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3482 {
3483   PetscErrorCode ierr;
3484   IS             iscol_local;
3485   PetscInt       csize;
3486 
3487   PetscFunctionBegin;
3488   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3489   if (call == MAT_REUSE_MATRIX) {
3490     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3491     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3492   } else {
3493     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3494   }
3495   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3496   if (call == MAT_INITIAL_MATRIX) {
3497     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3498     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3499   }
3500   PetscFunctionReturn(0);
3501 }
3502 
3503 #undef __FUNCT__
3504 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3505 /*
3506     Not great since it makes two copies of the submatrix, first an SeqAIJ
3507   in local and then by concatenating the local matrices the end result.
3508   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3509 
3510   Note: This requires a sequential iscol with all indices.
3511 */
3512 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3513 {
3514   PetscErrorCode ierr;
3515   PetscMPIInt    rank,size;
3516   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3517   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3518   Mat            *local,M,Mreuse;
3519   MatScalar      *vwork,*aa;
3520   MPI_Comm       comm = ((PetscObject)mat)->comm;
3521   Mat_SeqAIJ     *aij;
3522 
3523 
3524   PetscFunctionBegin;
3525   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3527 
3528   if (call ==  MAT_REUSE_MATRIX) {
3529     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3530     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3531     local = &Mreuse;
3532     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3533   } else {
3534     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3535     Mreuse = *local;
3536     ierr   = PetscFree(local);CHKERRQ(ierr);
3537   }
3538 
3539   /*
3540       m - number of local rows
3541       n - number of columns (same on all processors)
3542       rstart - first row in new global matrix generated
3543   */
3544   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3545   if (call == MAT_INITIAL_MATRIX) {
3546     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3547     ii  = aij->i;
3548     jj  = aij->j;
3549 
3550     /*
3551         Determine the number of non-zeros in the diagonal and off-diagonal
3552         portions of the matrix in order to do correct preallocation
3553     */
3554 
3555     /* first get start and end of "diagonal" columns */
3556     if (csize == PETSC_DECIDE) {
3557       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3558       if (mglobal == n) { /* square matrix */
3559 	nlocal = m;
3560       } else {
3561         nlocal = n/size + ((n % size) > rank);
3562       }
3563     } else {
3564       nlocal = csize;
3565     }
3566     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3567     rstart = rend - nlocal;
3568     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3569 
3570     /* next, compute all the lengths */
3571     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3572     olens = dlens + m;
3573     for (i=0; i<m; i++) {
3574       jend = ii[i+1] - ii[i];
3575       olen = 0;
3576       dlen = 0;
3577       for (j=0; j<jend; j++) {
3578         if (*jj < rstart || *jj >= rend) olen++;
3579         else dlen++;
3580         jj++;
3581       }
3582       olens[i] = olen;
3583       dlens[i] = dlen;
3584     }
3585     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3586     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3587     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3588     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3589     ierr = PetscFree(dlens);CHKERRQ(ierr);
3590   } else {
3591     PetscInt ml,nl;
3592 
3593     M = *newmat;
3594     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3595     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3596     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3597     /*
3598          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3599        rather than the slower MatSetValues().
3600     */
3601     M->was_assembled = PETSC_TRUE;
3602     M->assembled     = PETSC_FALSE;
3603   }
3604   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3605   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3606   ii  = aij->i;
3607   jj  = aij->j;
3608   aa  = aij->a;
3609   for (i=0; i<m; i++) {
3610     row   = rstart + i;
3611     nz    = ii[i+1] - ii[i];
3612     cwork = jj;     jj += nz;
3613     vwork = aa;     aa += nz;
3614     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3615   }
3616 
3617   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3618   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3619   *newmat = M;
3620 
3621   /* save submatrix used in processor for next request */
3622   if (call ==  MAT_INITIAL_MATRIX) {
3623     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3624     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3625   }
3626 
3627   PetscFunctionReturn(0);
3628 }
3629 
3630 EXTERN_C_BEGIN
3631 #undef __FUNCT__
3632 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3633 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3634 {
3635   PetscInt       m,cstart, cend,j,nnz,i,d;
3636   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3637   const PetscInt *JJ;
3638   PetscScalar    *values;
3639   PetscErrorCode ierr;
3640 
3641   PetscFunctionBegin;
3642   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3643 
3644   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3645   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3646   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3647   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3648   m      = B->rmap->n;
3649   cstart = B->cmap->rstart;
3650   cend   = B->cmap->rend;
3651   rstart = B->rmap->rstart;
3652 
3653   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3654 
3655 #if defined(PETSC_USE_DEBUGGING)
3656   for (i=0; i<m; i++) {
3657     nnz     = Ii[i+1]- Ii[i];
3658     JJ      = J + Ii[i];
3659     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3660     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3661     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3662   }
3663 #endif
3664 
3665   for (i=0; i<m; i++) {
3666     nnz     = Ii[i+1]- Ii[i];
3667     JJ      = J + Ii[i];
3668     nnz_max = PetscMax(nnz_max,nnz);
3669     d       = 0;
3670     for (j=0; j<nnz; j++) {
3671       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3672     }
3673     d_nnz[i] = d;
3674     o_nnz[i] = nnz - d;
3675   }
3676   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3677   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3678 
3679   if (v) values = (PetscScalar*)v;
3680   else {
3681     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3682     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3683   }
3684 
3685   for (i=0; i<m; i++) {
3686     ii   = i + rstart;
3687     nnz  = Ii[i+1]- Ii[i];
3688     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3689   }
3690   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3691   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3692 
3693   if (!v) {
3694     ierr = PetscFree(values);CHKERRQ(ierr);
3695   }
3696   PetscFunctionReturn(0);
3697 }
3698 EXTERN_C_END
3699 
3700 #undef __FUNCT__
3701 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3702 /*@
3703    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3704    (the default parallel PETSc format).
3705 
3706    Collective on MPI_Comm
3707 
3708    Input Parameters:
3709 +  B - the matrix
3710 .  i - the indices into j for the start of each local row (starts with zero)
3711 .  j - the column indices for each local row (starts with zero)
3712 -  v - optional values in the matrix
3713 
3714    Level: developer
3715 
3716    Notes:
3717        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3718      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3719      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3720 
3721        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3722 
3723        The format which is used for the sparse matrix input, is equivalent to a
3724     row-major ordering.. i.e for the following matrix, the input data expected is
3725     as shown:
3726 
3727         1 0 0
3728         2 0 3     P0
3729        -------
3730         4 5 6     P1
3731 
3732      Process0 [P0]: rows_owned=[0,1]
3733         i =  {0,1,3}  [size = nrow+1  = 2+1]
3734         j =  {0,0,2}  [size = nz = 6]
3735         v =  {1,2,3}  [size = nz = 6]
3736 
3737      Process1 [P1]: rows_owned=[2]
3738         i =  {0,3}    [size = nrow+1  = 1+1]
3739         j =  {0,1,2}  [size = nz = 6]
3740         v =  {4,5,6}  [size = nz = 6]
3741 
3742 .keywords: matrix, aij, compressed row, sparse, parallel
3743 
3744 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3745           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3746 @*/
3747 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3748 {
3749   PetscErrorCode ierr;
3750 
3751   PetscFunctionBegin;
3752   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3753   PetscFunctionReturn(0);
3754 }
3755 
3756 #undef __FUNCT__
3757 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3758 /*@C
3759    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3760    (the default parallel PETSc format).  For good matrix assembly performance
3761    the user should preallocate the matrix storage by setting the parameters
3762    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3763    performance can be increased by more than a factor of 50.
3764 
3765    Collective on MPI_Comm
3766 
3767    Input Parameters:
3768 +  A - the matrix
3769 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3770            (same value is used for all local rows)
3771 .  d_nnz - array containing the number of nonzeros in the various rows of the
3772            DIAGONAL portion of the local submatrix (possibly different for each row)
3773            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3774            The size of this array is equal to the number of local rows, i.e 'm'.
3775            You must leave room for the diagonal entry even if it is zero.
3776 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3777            submatrix (same value is used for all local rows).
3778 -  o_nnz - array containing the number of nonzeros in the various rows of the
3779            OFF-DIAGONAL portion of the local submatrix (possibly different for
3780            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3781            structure. The size of this array is equal to the number
3782            of local rows, i.e 'm'.
3783 
3784    If the *_nnz parameter is given then the *_nz parameter is ignored
3785 
3786    The AIJ format (also called the Yale sparse matrix format or
3787    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3788    storage.  The stored row and column indices begin with zero.
3789    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3790 
3791    The parallel matrix is partitioned such that the first m0 rows belong to
3792    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3793    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3794 
3795    The DIAGONAL portion of the local submatrix of a processor can be defined
3796    as the submatrix which is obtained by extraction the part corresponding to
3797    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3798    first row that belongs to the processor, r2 is the last row belonging to
3799    the this processor, and c1-c2 is range of indices of the local part of a
3800    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3801    common case of a square matrix, the row and column ranges are the same and
3802    the DIAGONAL part is also square. The remaining portion of the local
3803    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3804 
3805    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3806 
3807    You can call MatGetInfo() to get information on how effective the preallocation was;
3808    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3809    You can also run with the option -info and look for messages with the string
3810    malloc in them to see if additional memory allocation was needed.
3811 
3812    Example usage:
3813 
3814    Consider the following 8x8 matrix with 34 non-zero values, that is
3815    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3816    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3817    as follows:
3818 
3819 .vb
3820             1  2  0  |  0  3  0  |  0  4
3821     Proc0   0  5  6  |  7  0  0  |  8  0
3822             9  0 10  | 11  0  0  | 12  0
3823     -------------------------------------
3824            13  0 14  | 15 16 17  |  0  0
3825     Proc1   0 18  0  | 19 20 21  |  0  0
3826             0  0  0  | 22 23  0  | 24  0
3827     -------------------------------------
3828     Proc2  25 26 27  |  0  0 28  | 29  0
3829            30  0  0  | 31 32 33  |  0 34
3830 .ve
3831 
3832    This can be represented as a collection of submatrices as:
3833 
3834 .vb
3835       A B C
3836       D E F
3837       G H I
3838 .ve
3839 
3840    Where the submatrices A,B,C are owned by proc0, D,E,F are
3841    owned by proc1, G,H,I are owned by proc2.
3842 
3843    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3844    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3845    The 'M','N' parameters are 8,8, and have the same values on all procs.
3846 
3847    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3848    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3849    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3850    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3851    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3852    matrix, ans [DF] as another SeqAIJ matrix.
3853 
3854    When d_nz, o_nz parameters are specified, d_nz storage elements are
3855    allocated for every row of the local diagonal submatrix, and o_nz
3856    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3857    One way to choose d_nz and o_nz is to use the max nonzerors per local
3858    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3859    In this case, the values of d_nz,o_nz are:
3860 .vb
3861      proc0 : dnz = 2, o_nz = 2
3862      proc1 : dnz = 3, o_nz = 2
3863      proc2 : dnz = 1, o_nz = 4
3864 .ve
3865    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3866    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3867    for proc3. i.e we are using 12+15+10=37 storage locations to store
3868    34 values.
3869 
3870    When d_nnz, o_nnz parameters are specified, the storage is specified
3871    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3872    In the above case the values for d_nnz,o_nnz are:
3873 .vb
3874      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3875      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3876      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3877 .ve
3878    Here the space allocated is sum of all the above values i.e 34, and
3879    hence pre-allocation is perfect.
3880 
3881    Level: intermediate
3882 
3883 .keywords: matrix, aij, compressed row, sparse, parallel
3884 
3885 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3886           MPIAIJ, MatGetInfo()
3887 @*/
3888 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3889 {
3890   PetscErrorCode ierr;
3891 
3892   PetscFunctionBegin;
3893   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3894   PetscValidType(B,1);
3895   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3896   PetscFunctionReturn(0);
3897 }
3898 
3899 #undef __FUNCT__
3900 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3901 /*@
3902      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3903          CSR format the local rows.
3904 
3905    Collective on MPI_Comm
3906 
3907    Input Parameters:
3908 +  comm - MPI communicator
3909 .  m - number of local rows (Cannot be PETSC_DECIDE)
3910 .  n - This value should be the same as the local size used in creating the
3911        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3912        calculated if N is given) For square matrices n is almost always m.
3913 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3914 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3915 .   i - row indices
3916 .   j - column indices
3917 -   a - matrix values
3918 
3919    Output Parameter:
3920 .   mat - the matrix
3921 
3922    Level: intermediate
3923 
3924    Notes:
3925        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3926      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3927      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3928 
3929        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3930 
3931        The format which is used for the sparse matrix input, is equivalent to a
3932     row-major ordering.. i.e for the following matrix, the input data expected is
3933     as shown:
3934 
3935         1 0 0
3936         2 0 3     P0
3937        -------
3938         4 5 6     P1
3939 
3940      Process0 [P0]: rows_owned=[0,1]
3941         i =  {0,1,3}  [size = nrow+1  = 2+1]
3942         j =  {0,0,2}  [size = nz = 6]
3943         v =  {1,2,3}  [size = nz = 6]
3944 
3945      Process1 [P1]: rows_owned=[2]
3946         i =  {0,3}    [size = nrow+1  = 1+1]
3947         j =  {0,1,2}  [size = nz = 6]
3948         v =  {4,5,6}  [size = nz = 6]
3949 
3950 .keywords: matrix, aij, compressed row, sparse, parallel
3951 
3952 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3953           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3954 @*/
3955 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3956 {
3957   PetscErrorCode ierr;
3958 
3959  PetscFunctionBegin;
3960   if (i[0]) {
3961     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3962   }
3963   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3964   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3965   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3966   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3967   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3968   PetscFunctionReturn(0);
3969 }
3970 
3971 #undef __FUNCT__
3972 #define __FUNCT__ "MatCreateMPIAIJ"
3973 /*@C
3974    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3975    (the default parallel PETSc format).  For good matrix assembly performance
3976    the user should preallocate the matrix storage by setting the parameters
3977    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3978    performance can be increased by more than a factor of 50.
3979 
3980    Collective on MPI_Comm
3981 
3982    Input Parameters:
3983 +  comm - MPI communicator
3984 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3985            This value should be the same as the local size used in creating the
3986            y vector for the matrix-vector product y = Ax.
3987 .  n - This value should be the same as the local size used in creating the
3988        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3989        calculated if N is given) For square matrices n is almost always m.
3990 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3991 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3992 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3993            (same value is used for all local rows)
3994 .  d_nnz - array containing the number of nonzeros in the various rows of the
3995            DIAGONAL portion of the local submatrix (possibly different for each row)
3996            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3997            The size of this array is equal to the number of local rows, i.e 'm'.
3998            You must leave room for the diagonal entry even if it is zero.
3999 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4000            submatrix (same value is used for all local rows).
4001 -  o_nnz - array containing the number of nonzeros in the various rows of the
4002            OFF-DIAGONAL portion of the local submatrix (possibly different for
4003            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4004            structure. The size of this array is equal to the number
4005            of local rows, i.e 'm'.
4006 
4007    Output Parameter:
4008 .  A - the matrix
4009 
4010    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4011    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4012    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4013 
4014    Notes:
4015    If the *_nnz parameter is given then the *_nz parameter is ignored
4016 
4017    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4018    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4019    storage requirements for this matrix.
4020 
4021    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4022    processor than it must be used on all processors that share the object for
4023    that argument.
4024 
4025    The user MUST specify either the local or global matrix dimensions
4026    (possibly both).
4027 
4028    The parallel matrix is partitioned across processors such that the
4029    first m0 rows belong to process 0, the next m1 rows belong to
4030    process 1, the next m2 rows belong to process 2 etc.. where
4031    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4032    values corresponding to [m x N] submatrix.
4033 
4034    The columns are logically partitioned with the n0 columns belonging
4035    to 0th partition, the next n1 columns belonging to the next
4036    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4037 
4038    The DIAGONAL portion of the local submatrix on any given processor
4039    is the submatrix corresponding to the rows and columns m,n
4040    corresponding to the given processor. i.e diagonal matrix on
4041    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4042    etc. The remaining portion of the local submatrix [m x (N-n)]
4043    constitute the OFF-DIAGONAL portion. The example below better
4044    illustrates this concept.
4045 
4046    For a square global matrix we define each processor's diagonal portion
4047    to be its local rows and the corresponding columns (a square submatrix);
4048    each processor's off-diagonal portion encompasses the remainder of the
4049    local matrix (a rectangular submatrix).
4050 
4051    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4052 
4053    When calling this routine with a single process communicator, a matrix of
4054    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4055    type of communicator, use the construction mechanism:
4056      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4057 
4058    By default, this format uses inodes (identical nodes) when possible.
4059    We search for consecutive rows with the same nonzero structure, thereby
4060    reusing matrix information to achieve increased efficiency.
4061 
4062    Options Database Keys:
4063 +  -mat_no_inode  - Do not use inodes
4064 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4065 -  -mat_aij_oneindex - Internally use indexing starting at 1
4066         rather than 0.  Note that when calling MatSetValues(),
4067         the user still MUST index entries starting at 0!
4068 
4069 
4070    Example usage:
4071 
4072    Consider the following 8x8 matrix with 34 non-zero values, that is
4073    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4074    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4075    as follows:
4076 
4077 .vb
4078             1  2  0  |  0  3  0  |  0  4
4079     Proc0   0  5  6  |  7  0  0  |  8  0
4080             9  0 10  | 11  0  0  | 12  0
4081     -------------------------------------
4082            13  0 14  | 15 16 17  |  0  0
4083     Proc1   0 18  0  | 19 20 21  |  0  0
4084             0  0  0  | 22 23  0  | 24  0
4085     -------------------------------------
4086     Proc2  25 26 27  |  0  0 28  | 29  0
4087            30  0  0  | 31 32 33  |  0 34
4088 .ve
4089 
4090    This can be represented as a collection of submatrices as:
4091 
4092 .vb
4093       A B C
4094       D E F
4095       G H I
4096 .ve
4097 
4098    Where the submatrices A,B,C are owned by proc0, D,E,F are
4099    owned by proc1, G,H,I are owned by proc2.
4100 
4101    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4102    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4103    The 'M','N' parameters are 8,8, and have the same values on all procs.
4104 
4105    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4106    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4107    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4108    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4109    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4110    matrix, ans [DF] as another SeqAIJ matrix.
4111 
4112    When d_nz, o_nz parameters are specified, d_nz storage elements are
4113    allocated for every row of the local diagonal submatrix, and o_nz
4114    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4115    One way to choose d_nz and o_nz is to use the max nonzerors per local
4116    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4117    In this case, the values of d_nz,o_nz are:
4118 .vb
4119      proc0 : dnz = 2, o_nz = 2
4120      proc1 : dnz = 3, o_nz = 2
4121      proc2 : dnz = 1, o_nz = 4
4122 .ve
4123    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4124    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4125    for proc3. i.e we are using 12+15+10=37 storage locations to store
4126    34 values.
4127 
4128    When d_nnz, o_nnz parameters are specified, the storage is specified
4129    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4130    In the above case the values for d_nnz,o_nnz are:
4131 .vb
4132      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4133      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4134      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4135 .ve
4136    Here the space allocated is sum of all the above values i.e 34, and
4137    hence pre-allocation is perfect.
4138 
4139    Level: intermediate
4140 
4141 .keywords: matrix, aij, compressed row, sparse, parallel
4142 
4143 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4144           MPIAIJ, MatCreateMPIAIJWithArrays()
4145 @*/
4146 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4147 {
4148   PetscErrorCode ierr;
4149   PetscMPIInt    size;
4150 
4151   PetscFunctionBegin;
4152   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4153   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4154   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4155   if (size > 1) {
4156     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4157     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4158   } else {
4159     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4160     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4161   }
4162   PetscFunctionReturn(0);
4163 }
4164 
4165 #undef __FUNCT__
4166 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4167 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4168 {
4169   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4170 
4171   PetscFunctionBegin;
4172   *Ad     = a->A;
4173   *Ao     = a->B;
4174   *colmap = a->garray;
4175   PetscFunctionReturn(0);
4176 }
4177 
4178 #undef __FUNCT__
4179 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4180 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4181 {
4182   PetscErrorCode ierr;
4183   PetscInt       i;
4184   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4185 
4186   PetscFunctionBegin;
4187   if (coloring->ctype == IS_COLORING_GLOBAL) {
4188     ISColoringValue *allcolors,*colors;
4189     ISColoring      ocoloring;
4190 
4191     /* set coloring for diagonal portion */
4192     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4193 
4194     /* set coloring for off-diagonal portion */
4195     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4196     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4197     for (i=0; i<a->B->cmap->n; i++) {
4198       colors[i] = allcolors[a->garray[i]];
4199     }
4200     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4201     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4202     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4203     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4204   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4205     ISColoringValue *colors;
4206     PetscInt        *larray;
4207     ISColoring      ocoloring;
4208 
4209     /* set coloring for diagonal portion */
4210     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4211     for (i=0; i<a->A->cmap->n; i++) {
4212       larray[i] = i + A->cmap->rstart;
4213     }
4214     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4215     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4216     for (i=0; i<a->A->cmap->n; i++) {
4217       colors[i] = coloring->colors[larray[i]];
4218     }
4219     ierr = PetscFree(larray);CHKERRQ(ierr);
4220     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4221     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4222     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4223 
4224     /* set coloring for off-diagonal portion */
4225     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4226     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4227     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4228     for (i=0; i<a->B->cmap->n; i++) {
4229       colors[i] = coloring->colors[larray[i]];
4230     }
4231     ierr = PetscFree(larray);CHKERRQ(ierr);
4232     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4233     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4234     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4235   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4236 
4237   PetscFunctionReturn(0);
4238 }
4239 
4240 #if defined(PETSC_HAVE_ADIC)
4241 #undef __FUNCT__
4242 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4243 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4244 {
4245   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4246   PetscErrorCode ierr;
4247 
4248   PetscFunctionBegin;
4249   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4250   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4251   PetscFunctionReturn(0);
4252 }
4253 #endif
4254 
4255 #undef __FUNCT__
4256 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4257 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4258 {
4259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4260   PetscErrorCode ierr;
4261 
4262   PetscFunctionBegin;
4263   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4264   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4265   PetscFunctionReturn(0);
4266 }
4267 
4268 #undef __FUNCT__
4269 #define __FUNCT__ "MatMergeSymbolic"
4270 PetscErrorCode  MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4271 {
4272   PetscErrorCode ierr;
4273   PetscInt       m,N,i,rstart,nnz,*dnz,*onz;
4274   PetscInt       *indx;
4275 
4276   PetscFunctionBegin;
4277   /* This routine will ONLY return MPIAIJ type matrix */
4278   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4279   if (n == PETSC_DECIDE){
4280     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4281   }
4282   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4283   rstart -= m;
4284 
4285   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4286   for (i=0;i<m;i++) {
4287     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4288     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4289     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4290   }
4291 
4292   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4293   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4294   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4295   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4296   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4297   PetscFunctionReturn(0);
4298 }
4299 
4300 #undef __FUNCT__
4301 #define __FUNCT__ "MatMergeNumeric"
4302 PetscErrorCode  MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4303 {
4304   PetscErrorCode ierr;
4305   PetscInt       m,N,i,rstart,nnz,Ii;
4306   PetscInt       *indx;
4307   PetscScalar    *values;
4308 
4309   PetscFunctionBegin;
4310   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4311   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4312   for (i=0;i<m;i++) {
4313     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4314     Ii    = i + rstart;
4315     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4316     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4317   }
4318   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4319   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4320   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4321   PetscFunctionReturn(0);
4322 }
4323 
4324 #undef __FUNCT__
4325 #define __FUNCT__ "MatMerge"
4326 /*@
4327       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4328                  matrices from each processor
4329 
4330     Collective on MPI_Comm
4331 
4332    Input Parameters:
4333 +    comm - the communicators the parallel matrix will live on
4334 .    inmat - the input sequential matrices
4335 .    n - number of local columns (or PETSC_DECIDE)
4336 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4337 
4338    Output Parameter:
4339 .    outmat - the parallel matrix generated
4340 
4341     Level: advanced
4342 
4343    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4344 
4345 @*/
4346 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4347 {
4348   PetscErrorCode ierr;
4349 
4350   PetscFunctionBegin;
4351   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4352   if (scall == MAT_INITIAL_MATRIX){
4353     ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4354   }
4355   ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4356   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4357   PetscFunctionReturn(0);
4358 }
4359 
4360 #undef __FUNCT__
4361 #define __FUNCT__ "MatFileSplit"
4362 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4363 {
4364   PetscErrorCode    ierr;
4365   PetscMPIInt       rank;
4366   PetscInt          m,N,i,rstart,nnz;
4367   size_t            len;
4368   const PetscInt    *indx;
4369   PetscViewer       out;
4370   char              *name;
4371   Mat               B;
4372   const PetscScalar *values;
4373 
4374   PetscFunctionBegin;
4375   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4376   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4377   /* Should this be the type of the diagonal block of A? */
4378   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4379   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4380   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4381   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4382   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4383   for (i=0;i<m;i++) {
4384     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4385     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4386     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4387   }
4388   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4389   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4390 
4391   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4392   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4393   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4394   sprintf(name,"%s.%d",outfile,rank);
4395   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4396   ierr = PetscFree(name);
4397   ierr = MatView(B,out);CHKERRQ(ierr);
4398   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4399   ierr = MatDestroy(&B);CHKERRQ(ierr);
4400   PetscFunctionReturn(0);
4401 }
4402 
4403 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4404 #undef __FUNCT__
4405 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4406 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4407 {
4408   PetscErrorCode       ierr;
4409   Mat_Merge_SeqsToMPI  *merge;
4410   PetscContainer       container;
4411 
4412   PetscFunctionBegin;
4413   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4414   if (container) {
4415     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4416     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4417     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4418     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4419     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4420     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4421     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4422     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4423     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4424     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4425     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4426     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4427     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4428     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4429     ierr = PetscFree(merge);CHKERRQ(ierr);
4430     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4431   }
4432   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4433   PetscFunctionReturn(0);
4434 }
4435 
4436 #include <../src/mat/utils/freespace.h>
4437 #include <petscbt.h>
4438 
4439 #undef __FUNCT__
4440 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4441 /*@C
4442       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4443                  matrices from each processor
4444 
4445     Collective on MPI_Comm
4446 
4447    Input Parameters:
4448 +    comm - the communicators the parallel matrix will live on
4449 .    seqmat - the input sequential matrices
4450 .    m - number of local rows (or PETSC_DECIDE)
4451 .    n - number of local columns (or PETSC_DECIDE)
4452 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4453 
4454    Output Parameter:
4455 .    mpimat - the parallel matrix generated
4456 
4457     Level: advanced
4458 
4459    Notes:
4460      The dimensions of the sequential matrix in each processor MUST be the same.
4461      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4462      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4463 @*/
4464 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4465 {
4466   PetscErrorCode       ierr;
4467   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4468   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4469   PetscMPIInt          size,rank,taga,*len_s;
4470   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4471   PetscInt             proc,m;
4472   PetscInt             **buf_ri,**buf_rj;
4473   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4474   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4475   MPI_Request          *s_waits,*r_waits;
4476   MPI_Status           *status;
4477   MatScalar            *aa=a->a;
4478   MatScalar            **abuf_r,*ba_i;
4479   Mat_Merge_SeqsToMPI  *merge;
4480   PetscContainer       container;
4481 
4482   PetscFunctionBegin;
4483   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4484 
4485   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4486   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4487 
4488   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4489   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4490 
4491   bi     = merge->bi;
4492   bj     = merge->bj;
4493   buf_ri = merge->buf_ri;
4494   buf_rj = merge->buf_rj;
4495 
4496   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4497   owners = merge->rowmap->range;
4498   len_s  = merge->len_s;
4499 
4500   /* send and recv matrix values */
4501   /*-----------------------------*/
4502   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4503   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4504 
4505   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4506   for (proc=0,k=0; proc<size; proc++){
4507     if (!len_s[proc]) continue;
4508     i = owners[proc];
4509     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4510     k++;
4511   }
4512 
4513   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4514   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4515   ierr = PetscFree(status);CHKERRQ(ierr);
4516 
4517   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4518   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4519 
4520   /* insert mat values of mpimat */
4521   /*----------------------------*/
4522   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4523   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4524 
4525   for (k=0; k<merge->nrecv; k++){
4526     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4527     nrows = *(buf_ri_k[k]);
4528     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4529     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4530   }
4531 
4532   /* set values of ba */
4533   m = merge->rowmap->n;
4534   for (i=0; i<m; i++) {
4535     arow = owners[rank] + i;
4536     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4537     bnzi = bi[i+1] - bi[i];
4538     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4539 
4540     /* add local non-zero vals of this proc's seqmat into ba */
4541     anzi = ai[arow+1] - ai[arow];
4542     aj   = a->j + ai[arow];
4543     aa   = a->a + ai[arow];
4544     nextaj = 0;
4545     for (j=0; nextaj<anzi; j++){
4546       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4547         ba_i[j] += aa[nextaj++];
4548       }
4549     }
4550 
4551     /* add received vals into ba */
4552     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4553       /* i-th row */
4554       if (i == *nextrow[k]) {
4555         anzi = *(nextai[k]+1) - *nextai[k];
4556         aj   = buf_rj[k] + *(nextai[k]);
4557         aa   = abuf_r[k] + *(nextai[k]);
4558         nextaj = 0;
4559         for (j=0; nextaj<anzi; j++){
4560           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4561             ba_i[j] += aa[nextaj++];
4562           }
4563         }
4564         nextrow[k]++; nextai[k]++;
4565       }
4566     }
4567     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4568   }
4569   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4570   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4571 
4572   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4573   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4574   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4575   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4576   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4577   PetscFunctionReturn(0);
4578 }
4579 
4580 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4581 
4582 #undef __FUNCT__
4583 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4584 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4585 {
4586   PetscErrorCode       ierr;
4587   Mat                  B_mpi;
4588   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4589   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4590   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4591   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4592   PetscInt             len,proc,*dnz,*onz;
4593   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4594   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4595   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4596   MPI_Status           *status;
4597   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4598   PetscBT              lnkbt;
4599   Mat_Merge_SeqsToMPI  *merge;
4600   PetscContainer       container;
4601 
4602   PetscFunctionBegin;
4603   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4604 
4605   /* make sure it is a PETSc comm */
4606   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4607   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4608   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4609 
4610   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4611   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4612 
4613   /* determine row ownership */
4614   /*---------------------------------------------------------*/
4615   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4616   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4617   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4618   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4619   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4620   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4621   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4622 
4623   m      = merge->rowmap->n;
4624   M      = merge->rowmap->N;
4625   owners = merge->rowmap->range;
4626 
4627   /* determine the number of messages to send, their lengths */
4628   /*---------------------------------------------------------*/
4629   len_s  = merge->len_s;
4630 
4631   len = 0;  /* length of buf_si[] */
4632   merge->nsend = 0;
4633   for (proc=0; proc<size; proc++){
4634     len_si[proc] = 0;
4635     if (proc == rank){
4636       len_s[proc] = 0;
4637     } else {
4638       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4639       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4640     }
4641     if (len_s[proc]) {
4642       merge->nsend++;
4643       nrows = 0;
4644       for (i=owners[proc]; i<owners[proc+1]; i++){
4645         if (ai[i+1] > ai[i]) nrows++;
4646       }
4647       len_si[proc] = 2*(nrows+1);
4648       len += len_si[proc];
4649     }
4650   }
4651 
4652   /* determine the number and length of messages to receive for ij-structure */
4653   /*-------------------------------------------------------------------------*/
4654   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4655   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4656 
4657   /* post the Irecv of j-structure */
4658   /*-------------------------------*/
4659   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4660   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4661 
4662   /* post the Isend of j-structure */
4663   /*--------------------------------*/
4664   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4665 
4666   for (proc=0, k=0; proc<size; proc++){
4667     if (!len_s[proc]) continue;
4668     i = owners[proc];
4669     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4670     k++;
4671   }
4672 
4673   /* receives and sends of j-structure are complete */
4674   /*------------------------------------------------*/
4675   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4676   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4677 
4678   /* send and recv i-structure */
4679   /*---------------------------*/
4680   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4681   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4682 
4683   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4684   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4685   for (proc=0,k=0; proc<size; proc++){
4686     if (!len_s[proc]) continue;
4687     /* form outgoing message for i-structure:
4688          buf_si[0]:                 nrows to be sent
4689                [1:nrows]:           row index (global)
4690                [nrows+1:2*nrows+1]: i-structure index
4691     */
4692     /*-------------------------------------------*/
4693     nrows = len_si[proc]/2 - 1;
4694     buf_si_i    = buf_si + nrows+1;
4695     buf_si[0]   = nrows;
4696     buf_si_i[0] = 0;
4697     nrows = 0;
4698     for (i=owners[proc]; i<owners[proc+1]; i++){
4699       anzi = ai[i+1] - ai[i];
4700       if (anzi) {
4701         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4702         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4703         nrows++;
4704       }
4705     }
4706     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4707     k++;
4708     buf_si += len_si[proc];
4709   }
4710 
4711   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4712   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4713 
4714   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4715   for (i=0; i<merge->nrecv; i++){
4716     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4717   }
4718 
4719   ierr = PetscFree(len_si);CHKERRQ(ierr);
4720   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4721   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4722   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4723   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4724   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4725   ierr = PetscFree(status);CHKERRQ(ierr);
4726 
4727   /* compute a local seq matrix in each processor */
4728   /*----------------------------------------------*/
4729   /* allocate bi array and free space for accumulating nonzero column info */
4730   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4731   bi[0] = 0;
4732 
4733   /* create and initialize a linked list */
4734   nlnk = N+1;
4735   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4736 
4737   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4738   len = 0;
4739   len  = ai[owners[rank+1]] - ai[owners[rank]];
4740   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4741   current_space = free_space;
4742 
4743   /* determine symbolic info for each local row */
4744   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4745 
4746   for (k=0; k<merge->nrecv; k++){
4747     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4748     nrows = *buf_ri_k[k];
4749     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4750     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4751   }
4752 
4753   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4754   len = 0;
4755   for (i=0;i<m;i++) {
4756     bnzi   = 0;
4757     /* add local non-zero cols of this proc's seqmat into lnk */
4758     arow   = owners[rank] + i;
4759     anzi   = ai[arow+1] - ai[arow];
4760     aj     = a->j + ai[arow];
4761     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4762     bnzi += nlnk;
4763     /* add received col data into lnk */
4764     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4765       if (i == *nextrow[k]) { /* i-th row */
4766         anzi = *(nextai[k]+1) - *nextai[k];
4767         aj   = buf_rj[k] + *nextai[k];
4768         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4769         bnzi += nlnk;
4770         nextrow[k]++; nextai[k]++;
4771       }
4772     }
4773     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4774 
4775     /* if free space is not available, make more free space */
4776     if (current_space->local_remaining<bnzi) {
4777       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4778       nspacedouble++;
4779     }
4780     /* copy data into free space, then initialize lnk */
4781     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4782     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4783 
4784     current_space->array           += bnzi;
4785     current_space->local_used      += bnzi;
4786     current_space->local_remaining -= bnzi;
4787 
4788     bi[i+1] = bi[i] + bnzi;
4789   }
4790 
4791   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4792 
4793   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4794   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4795   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4796 
4797   /* create symbolic parallel matrix B_mpi */
4798   /*---------------------------------------*/
4799   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4800   if (n==PETSC_DECIDE) {
4801     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4802   } else {
4803     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4804   }
4805   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4806   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4807   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4808 
4809   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4810   B_mpi->assembled     = PETSC_FALSE;
4811   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4812   merge->bi            = bi;
4813   merge->bj            = bj;
4814   merge->buf_ri        = buf_ri;
4815   merge->buf_rj        = buf_rj;
4816   merge->coi           = PETSC_NULL;
4817   merge->coj           = PETSC_NULL;
4818   merge->owners_co     = PETSC_NULL;
4819 
4820   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4821 
4822   /* attach the supporting struct to B_mpi for reuse */
4823   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4824   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4825   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4826   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4827   *mpimat = B_mpi;
4828 
4829   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4830   PetscFunctionReturn(0);
4831 }
4832 
4833 #undef __FUNCT__
4834 #define __FUNCT__ "MatMerge_SeqsToMPI"
4835 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4836 {
4837   PetscErrorCode   ierr;
4838 
4839   PetscFunctionBegin;
4840   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4841   if (scall == MAT_INITIAL_MATRIX){
4842     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4843   }
4844   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4845   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4846   PetscFunctionReturn(0);
4847 }
4848 
4849 #undef __FUNCT__
4850 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4851 /*@
4852      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4853           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4854           with MatGetSize()
4855 
4856     Not Collective
4857 
4858    Input Parameters:
4859 +    A - the matrix
4860 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4861 
4862    Output Parameter:
4863 .    A_loc - the local sequential matrix generated
4864 
4865     Level: developer
4866 
4867 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4868 
4869 @*/
4870 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4871 {
4872   PetscErrorCode  ierr;
4873   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4874   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4875   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4876   MatScalar       *aa=a->a,*ba=b->a,*cam;
4877   PetscScalar     *ca;
4878   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4879   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4880   PetscBool       match;
4881 
4882   PetscFunctionBegin;
4883   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4884   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4885   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4886   if (scall == MAT_INITIAL_MATRIX){
4887     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4888     ci[0] = 0;
4889     for (i=0; i<am; i++){
4890       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4891     }
4892     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4893     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4894     k = 0;
4895     for (i=0; i<am; i++) {
4896       ncols_o = bi[i+1] - bi[i];
4897       ncols_d = ai[i+1] - ai[i];
4898       /* off-diagonal portion of A */
4899       for (jo=0; jo<ncols_o; jo++) {
4900         col = cmap[*bj];
4901         if (col >= cstart) break;
4902         cj[k]   = col; bj++;
4903         ca[k++] = *ba++;
4904       }
4905       /* diagonal portion of A */
4906       for (j=0; j<ncols_d; j++) {
4907         cj[k]   = cstart + *aj++;
4908         ca[k++] = *aa++;
4909       }
4910       /* off-diagonal portion of A */
4911       for (j=jo; j<ncols_o; j++) {
4912         cj[k]   = cmap[*bj++];
4913         ca[k++] = *ba++;
4914       }
4915     }
4916     /* put together the new matrix */
4917     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4918     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4919     /* Since these are PETSc arrays, change flags to free them as necessary. */
4920     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4921     mat->free_a  = PETSC_TRUE;
4922     mat->free_ij = PETSC_TRUE;
4923     mat->nonew   = 0;
4924   } else if (scall == MAT_REUSE_MATRIX){
4925     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4926     ci = mat->i; cj = mat->j; cam = mat->a;
4927     for (i=0; i<am; i++) {
4928       /* off-diagonal portion of A */
4929       ncols_o = bi[i+1] - bi[i];
4930       for (jo=0; jo<ncols_o; jo++) {
4931         col = cmap[*bj];
4932         if (col >= cstart) break;
4933         *cam++ = *ba++; bj++;
4934       }
4935       /* diagonal portion of A */
4936       ncols_d = ai[i+1] - ai[i];
4937       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4938       /* off-diagonal portion of A */
4939       for (j=jo; j<ncols_o; j++) {
4940         *cam++ = *ba++; bj++;
4941       }
4942     }
4943   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4944   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4945   PetscFunctionReturn(0);
4946 }
4947 
4948 #undef __FUNCT__
4949 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4950 /*@C
4951      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4952 
4953     Not Collective
4954 
4955    Input Parameters:
4956 +    A - the matrix
4957 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4958 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4959 
4960    Output Parameter:
4961 .    A_loc - the local sequential matrix generated
4962 
4963     Level: developer
4964 
4965 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4966 
4967 @*/
4968 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4969 {
4970   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4971   PetscErrorCode    ierr;
4972   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4973   IS                isrowa,iscola;
4974   Mat               *aloc;
4975   PetscBool       match;
4976 
4977   PetscFunctionBegin;
4978   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4979   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4980   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4981   if (!row){
4982     start = A->rmap->rstart; end = A->rmap->rend;
4983     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4984   } else {
4985     isrowa = *row;
4986   }
4987   if (!col){
4988     start = A->cmap->rstart;
4989     cmap  = a->garray;
4990     nzA   = a->A->cmap->n;
4991     nzB   = a->B->cmap->n;
4992     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4993     ncols = 0;
4994     for (i=0; i<nzB; i++) {
4995       if (cmap[i] < start) idx[ncols++] = cmap[i];
4996       else break;
4997     }
4998     imark = i;
4999     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5000     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5001     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5002   } else {
5003     iscola = *col;
5004   }
5005   if (scall != MAT_INITIAL_MATRIX){
5006     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5007     aloc[0] = *A_loc;
5008   }
5009   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5010   *A_loc = aloc[0];
5011   ierr = PetscFree(aloc);CHKERRQ(ierr);
5012   if (!row){
5013     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5014   }
5015   if (!col){
5016     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5017   }
5018   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5019   PetscFunctionReturn(0);
5020 }
5021 
5022 #undef __FUNCT__
5023 #define __FUNCT__ "MatGetBrowsOfAcols"
5024 /*@C
5025     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5026 
5027     Collective on Mat
5028 
5029    Input Parameters:
5030 +    A,B - the matrices in mpiaij format
5031 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5032 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5033 
5034    Output Parameter:
5035 +    rowb, colb - index sets of rows and columns of B to extract
5036 -    B_seq - the sequential matrix generated
5037 
5038     Level: developer
5039 
5040 @*/
5041 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5042 {
5043   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5044   PetscErrorCode    ierr;
5045   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5046   IS                isrowb,iscolb;
5047   Mat               *bseq=PETSC_NULL;
5048 
5049   PetscFunctionBegin;
5050   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5051     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5052   }
5053   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5054 
5055   if (scall == MAT_INITIAL_MATRIX){
5056     start = A->cmap->rstart;
5057     cmap  = a->garray;
5058     nzA   = a->A->cmap->n;
5059     nzB   = a->B->cmap->n;
5060     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5061     ncols = 0;
5062     for (i=0; i<nzB; i++) {  /* row < local row index */
5063       if (cmap[i] < start) idx[ncols++] = cmap[i];
5064       else break;
5065     }
5066     imark = i;
5067     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5068     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5069     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5070     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5071   } else {
5072     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5073     isrowb = *rowb; iscolb = *colb;
5074     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5075     bseq[0] = *B_seq;
5076   }
5077   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5078   *B_seq = bseq[0];
5079   ierr = PetscFree(bseq);CHKERRQ(ierr);
5080   if (!rowb){
5081     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5082   } else {
5083     *rowb = isrowb;
5084   }
5085   if (!colb){
5086     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5087   } else {
5088     *colb = iscolb;
5089   }
5090   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5091   PetscFunctionReturn(0);
5092 }
5093 
5094 #undef __FUNCT__
5095 #define __FUNCT__ "MatGetBrowsOfAoCols"
5096 /*@C
5097     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5098     of the OFF-DIAGONAL portion of local A
5099 
5100     Collective on Mat
5101 
5102    Input Parameters:
5103 +    A,B - the matrices in mpiaij format
5104 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5105 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5106 .    startsj_r - similar to startsj for receives
5107 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5108 
5109    Output Parameter:
5110 +    B_oth - the sequential matrix generated
5111 
5112     Level: developer
5113 
5114 @*/
5115 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5116 {
5117   VecScatter_MPI_General *gen_to,*gen_from;
5118   PetscErrorCode         ierr;
5119   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5120   Mat_SeqAIJ             *b_oth;
5121   VecScatter             ctx=a->Mvctx;
5122   MPI_Comm               comm=((PetscObject)ctx)->comm;
5123   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5124   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5125   PetscScalar            *rvalues,*svalues;
5126   MatScalar              *b_otha,*bufa,*bufA;
5127   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5128   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5129   MPI_Status             *sstatus,rstatus;
5130   PetscMPIInt            jj;
5131   PetscInt               *cols,sbs,rbs;
5132   PetscScalar            *vals;
5133 
5134   PetscFunctionBegin;
5135   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5136     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5137   }
5138   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5139   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5140 
5141   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5142   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5143   rvalues  = gen_from->values; /* holds the length of receiving row */
5144   svalues  = gen_to->values;   /* holds the length of sending row */
5145   nrecvs   = gen_from->n;
5146   nsends   = gen_to->n;
5147 
5148   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5149   srow     = gen_to->indices;   /* local row index to be sent */
5150   sstarts  = gen_to->starts;
5151   sprocs   = gen_to->procs;
5152   sstatus  = gen_to->sstatus;
5153   sbs      = gen_to->bs;
5154   rstarts  = gen_from->starts;
5155   rprocs   = gen_from->procs;
5156   rbs      = gen_from->bs;
5157 
5158   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5159   if (scall == MAT_INITIAL_MATRIX){
5160     /* i-array */
5161     /*---------*/
5162     /*  post receives */
5163     for (i=0; i<nrecvs; i++){
5164       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5165       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5166       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5167     }
5168 
5169     /* pack the outgoing message */
5170     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5171     sstartsj[0] = 0;  rstartsj[0] = 0;
5172     len = 0; /* total length of j or a array to be sent */
5173     k = 0;
5174     for (i=0; i<nsends; i++){
5175       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5176       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5177       for (j=0; j<nrows; j++) {
5178         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5179         for (l=0; l<sbs; l++){
5180           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5181           rowlen[j*sbs+l] = ncols;
5182           len += ncols;
5183           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5184         }
5185         k++;
5186       }
5187       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5188       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5189     }
5190     /* recvs and sends of i-array are completed */
5191     i = nrecvs;
5192     while (i--) {
5193       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5194     }
5195     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5196 
5197     /* allocate buffers for sending j and a arrays */
5198     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5199     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5200 
5201     /* create i-array of B_oth */
5202     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5203     b_othi[0] = 0;
5204     len = 0; /* total length of j or a array to be received */
5205     k = 0;
5206     for (i=0; i<nrecvs; i++){
5207       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5208       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5209       for (j=0; j<nrows; j++) {
5210         b_othi[k+1] = b_othi[k] + rowlen[j];
5211         len += rowlen[j]; k++;
5212       }
5213       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5214     }
5215 
5216     /* allocate space for j and a arrrays of B_oth */
5217     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5218     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5219 
5220     /* j-array */
5221     /*---------*/
5222     /*  post receives of j-array */
5223     for (i=0; i<nrecvs; i++){
5224       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5225       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5226     }
5227 
5228     /* pack the outgoing message j-array */
5229     k = 0;
5230     for (i=0; i<nsends; i++){
5231       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5232       bufJ = bufj+sstartsj[i];
5233       for (j=0; j<nrows; j++) {
5234         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5235         for (ll=0; ll<sbs; ll++){
5236           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5237           for (l=0; l<ncols; l++){
5238             *bufJ++ = cols[l];
5239           }
5240           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5241         }
5242       }
5243       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5244     }
5245 
5246     /* recvs and sends of j-array are completed */
5247     i = nrecvs;
5248     while (i--) {
5249       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5250     }
5251     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5252   } else if (scall == MAT_REUSE_MATRIX){
5253     sstartsj = *startsj;
5254     rstartsj = *startsj_r;
5255     bufa     = *bufa_ptr;
5256     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5257     b_otha   = b_oth->a;
5258   } else {
5259     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5260   }
5261 
5262   /* a-array */
5263   /*---------*/
5264   /*  post receives of a-array */
5265   for (i=0; i<nrecvs; i++){
5266     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5267     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5268   }
5269 
5270   /* pack the outgoing message a-array */
5271   k = 0;
5272   for (i=0; i<nsends; i++){
5273     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5274     bufA = bufa+sstartsj[i];
5275     for (j=0; j<nrows; j++) {
5276       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5277       for (ll=0; ll<sbs; ll++){
5278         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5279         for (l=0; l<ncols; l++){
5280           *bufA++ = vals[l];
5281         }
5282         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5283       }
5284     }
5285     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5286   }
5287   /* recvs and sends of a-array are completed */
5288   i = nrecvs;
5289   while (i--) {
5290     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5291   }
5292   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5293   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5294 
5295   if (scall == MAT_INITIAL_MATRIX){
5296     /* put together the new matrix */
5297     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5298 
5299     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5300     /* Since these are PETSc arrays, change flags to free them as necessary. */
5301     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5302     b_oth->free_a  = PETSC_TRUE;
5303     b_oth->free_ij = PETSC_TRUE;
5304     b_oth->nonew   = 0;
5305 
5306     ierr = PetscFree(bufj);CHKERRQ(ierr);
5307     if (!startsj || !bufa_ptr){
5308       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5309       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5310     } else {
5311       *startsj   = sstartsj;
5312       *startsj_r = rstartsj;
5313       *bufa_ptr  = bufa;
5314     }
5315   }
5316   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5317   PetscFunctionReturn(0);
5318 }
5319 
5320 #undef __FUNCT__
5321 #define __FUNCT__ "MatGetCommunicationStructs"
5322 /*@C
5323   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5324 
5325   Not Collective
5326 
5327   Input Parameters:
5328 . A - The matrix in mpiaij format
5329 
5330   Output Parameter:
5331 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5332 . colmap - A map from global column index to local index into lvec
5333 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5334 
5335   Level: developer
5336 
5337 @*/
5338 #if defined (PETSC_USE_CTABLE)
5339 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5340 #else
5341 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5342 #endif
5343 {
5344   Mat_MPIAIJ *a;
5345 
5346   PetscFunctionBegin;
5347   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5348   PetscValidPointer(lvec, 2);
5349   PetscValidPointer(colmap, 3);
5350   PetscValidPointer(multScatter, 4);
5351   a = (Mat_MPIAIJ *) A->data;
5352   if (lvec) *lvec = a->lvec;
5353   if (colmap) *colmap = a->colmap;
5354   if (multScatter) *multScatter = a->Mvctx;
5355   PetscFunctionReturn(0);
5356 }
5357 
5358 EXTERN_C_BEGIN
5359 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5360 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5361 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5362 EXTERN_C_END
5363 
5364 #undef __FUNCT__
5365 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5366 /*
5367     Computes (B'*A')' since computing B*A directly is untenable
5368 
5369                n                       p                          p
5370         (              )       (              )         (                  )
5371       m (      A       )  *  n (       B      )   =   m (         C        )
5372         (              )       (              )         (                  )
5373 
5374 */
5375 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5376 {
5377   PetscErrorCode     ierr;
5378   Mat                At,Bt,Ct;
5379 
5380   PetscFunctionBegin;
5381   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5382   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5383   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5384   ierr = MatDestroy(&At);CHKERRQ(ierr);
5385   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5386   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5387   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5388   PetscFunctionReturn(0);
5389 }
5390 
5391 #undef __FUNCT__
5392 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5393 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5394 {
5395   PetscErrorCode ierr;
5396   PetscInt       m=A->rmap->n,n=B->cmap->n;
5397   Mat            Cmat;
5398 
5399   PetscFunctionBegin;
5400   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5401   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5402   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5403   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5404   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5405   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5406   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5407   *C   = Cmat;
5408   PetscFunctionReturn(0);
5409 }
5410 
5411 /* ----------------------------------------------------------------*/
5412 #undef __FUNCT__
5413 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5414 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5415 {
5416   PetscErrorCode ierr;
5417 
5418   PetscFunctionBegin;
5419   if (scall == MAT_INITIAL_MATRIX){
5420     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5421   }
5422   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5423   PetscFunctionReturn(0);
5424 }
5425 
5426 EXTERN_C_BEGIN
5427 #if defined(PETSC_HAVE_MUMPS)
5428 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5429 #endif
5430 #if defined(PETSC_HAVE_PASTIX)
5431 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5432 #endif
5433 #if defined(PETSC_HAVE_SUPERLU_DIST)
5434 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5435 #endif
5436 #if defined(PETSC_HAVE_SPOOLES)
5437 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5438 #endif
5439 EXTERN_C_END
5440 
5441 /*MC
5442    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5443 
5444    Options Database Keys:
5445 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5446 
5447   Level: beginner
5448 
5449 .seealso: MatCreateMPIAIJ()
5450 M*/
5451 
5452 EXTERN_C_BEGIN
5453 #undef __FUNCT__
5454 #define __FUNCT__ "MatCreate_MPIAIJ"
5455 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5456 {
5457   Mat_MPIAIJ     *b;
5458   PetscErrorCode ierr;
5459   PetscMPIInt    size;
5460 
5461   PetscFunctionBegin;
5462   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5463 
5464   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5465   B->data         = (void*)b;
5466   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5467   B->rmap->bs     = 1;
5468   B->assembled    = PETSC_FALSE;
5469 
5470   B->insertmode   = NOT_SET_VALUES;
5471   b->size         = size;
5472   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5473 
5474   /* build cache for off array entries formed */
5475   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5476   b->donotstash  = PETSC_FALSE;
5477   b->colmap      = 0;
5478   b->garray      = 0;
5479   b->roworiented = PETSC_TRUE;
5480 
5481   /* stuff used for matrix vector multiply */
5482   b->lvec      = PETSC_NULL;
5483   b->Mvctx     = PETSC_NULL;
5484 
5485   /* stuff for MatGetRow() */
5486   b->rowindices   = 0;
5487   b->rowvalues    = 0;
5488   b->getrowactive = PETSC_FALSE;
5489 
5490 #if defined(PETSC_HAVE_SPOOLES)
5491   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5492                                      "MatGetFactor_mpiaij_spooles",
5493                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5494 #endif
5495 #if defined(PETSC_HAVE_MUMPS)
5496   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5497                                      "MatGetFactor_aij_mumps",
5498                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5499 #endif
5500 #if defined(PETSC_HAVE_PASTIX)
5501   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5502 					   "MatGetFactor_mpiaij_pastix",
5503 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5504 #endif
5505 #if defined(PETSC_HAVE_SUPERLU_DIST)
5506   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5507                                      "MatGetFactor_mpiaij_superlu_dist",
5508                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5509 #endif
5510   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5511                                      "MatStoreValues_MPIAIJ",
5512                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5513   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5514                                      "MatRetrieveValues_MPIAIJ",
5515                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5516   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5517 				     "MatGetDiagonalBlock_MPIAIJ",
5518                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5519   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5520 				     "MatIsTranspose_MPIAIJ",
5521 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5522   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5523 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5524 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5525   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5526 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5527 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5528   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5529 				     "MatDiagonalScaleLocal_MPIAIJ",
5530 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5531   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5532                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5533                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5534   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5535                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5536                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5537   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5538                                      "MatConvert_MPIAIJ_MPISBAIJ",
5539                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5540   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5541                                      "MatMatMult_MPIDense_MPIAIJ",
5542                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5543   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5544                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5545                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5546   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5547                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5548                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5549   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5550   PetscFunctionReturn(0);
5551 }
5552 EXTERN_C_END
5553 
5554 #undef __FUNCT__
5555 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5556 /*@
5557      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5558          and "off-diagonal" part of the matrix in CSR format.
5559 
5560    Collective on MPI_Comm
5561 
5562    Input Parameters:
5563 +  comm - MPI communicator
5564 .  m - number of local rows (Cannot be PETSC_DECIDE)
5565 .  n - This value should be the same as the local size used in creating the
5566        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5567        calculated if N is given) For square matrices n is almost always m.
5568 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5569 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5570 .   i - row indices for "diagonal" portion of matrix
5571 .   j - column indices
5572 .   a - matrix values
5573 .   oi - row indices for "off-diagonal" portion of matrix
5574 .   oj - column indices
5575 -   oa - matrix values
5576 
5577    Output Parameter:
5578 .   mat - the matrix
5579 
5580    Level: advanced
5581 
5582    Notes:
5583        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5584        must free the arrays once the matrix has been destroyed and not before.
5585 
5586        The i and j indices are 0 based
5587 
5588        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5589 
5590        This sets local rows and cannot be used to set off-processor values.
5591 
5592        You cannot later use MatSetValues() to change values in this matrix.
5593 
5594 .keywords: matrix, aij, compressed row, sparse, parallel
5595 
5596 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5597           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5598 @*/
5599 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5600 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5601 {
5602   PetscErrorCode ierr;
5603   Mat_MPIAIJ     *maij;
5604 
5605  PetscFunctionBegin;
5606   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5607   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5608   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5609   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5610   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5611   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5612   maij = (Mat_MPIAIJ*) (*mat)->data;
5613   maij->donotstash     = PETSC_TRUE;
5614   (*mat)->preallocated = PETSC_TRUE;
5615 
5616   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5617   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5618   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5619   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5620 
5621   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5622   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5623 
5624   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5625   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5626   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5627   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5628 
5629   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5630   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5631   PetscFunctionReturn(0);
5632 }
5633 
5634 /*
5635     Special version for direct calls from Fortran
5636 */
5637 #include <private/fortranimpl.h>
5638 
5639 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5640 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5641 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5642 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5643 #endif
5644 
5645 /* Change these macros so can be used in void function */
5646 #undef CHKERRQ
5647 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5648 #undef SETERRQ2
5649 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5650 #undef SETERRQ
5651 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5652 
5653 EXTERN_C_BEGIN
5654 #undef __FUNCT__
5655 #define __FUNCT__ "matsetvaluesmpiaij_"
5656 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5657 {
5658   Mat             mat = *mmat;
5659   PetscInt        m = *mm, n = *mn;
5660   InsertMode      addv = *maddv;
5661   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5662   PetscScalar     value;
5663   PetscErrorCode  ierr;
5664 
5665   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5666   if (mat->insertmode == NOT_SET_VALUES) {
5667     mat->insertmode = addv;
5668   }
5669 #if defined(PETSC_USE_DEBUG)
5670   else if (mat->insertmode != addv) {
5671     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5672   }
5673 #endif
5674   {
5675   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5676   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5677   PetscBool       roworiented = aij->roworiented;
5678 
5679   /* Some Variables required in the macro */
5680   Mat             A = aij->A;
5681   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5682   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5683   MatScalar       *aa = a->a;
5684   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5685   Mat             B = aij->B;
5686   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5687   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5688   MatScalar       *ba = b->a;
5689 
5690   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5691   PetscInt        nonew = a->nonew;
5692   MatScalar       *ap1,*ap2;
5693 
5694   PetscFunctionBegin;
5695   for (i=0; i<m; i++) {
5696     if (im[i] < 0) continue;
5697 #if defined(PETSC_USE_DEBUG)
5698     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5699 #endif
5700     if (im[i] >= rstart && im[i] < rend) {
5701       row      = im[i] - rstart;
5702       lastcol1 = -1;
5703       rp1      = aj + ai[row];
5704       ap1      = aa + ai[row];
5705       rmax1    = aimax[row];
5706       nrow1    = ailen[row];
5707       low1     = 0;
5708       high1    = nrow1;
5709       lastcol2 = -1;
5710       rp2      = bj + bi[row];
5711       ap2      = ba + bi[row];
5712       rmax2    = bimax[row];
5713       nrow2    = bilen[row];
5714       low2     = 0;
5715       high2    = nrow2;
5716 
5717       for (j=0; j<n; j++) {
5718         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5719         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5720         if (in[j] >= cstart && in[j] < cend){
5721           col = in[j] - cstart;
5722           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5723         } else if (in[j] < 0) continue;
5724 #if defined(PETSC_USE_DEBUG)
5725         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5726 #endif
5727         else {
5728           if (mat->was_assembled) {
5729             if (!aij->colmap) {
5730               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5731             }
5732 #if defined (PETSC_USE_CTABLE)
5733             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5734 	    col--;
5735 #else
5736             col = aij->colmap[in[j]] - 1;
5737 #endif
5738             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5739               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5740               col =  in[j];
5741               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5742               B = aij->B;
5743               b = (Mat_SeqAIJ*)B->data;
5744               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5745               rp2      = bj + bi[row];
5746               ap2      = ba + bi[row];
5747               rmax2    = bimax[row];
5748               nrow2    = bilen[row];
5749               low2     = 0;
5750               high2    = nrow2;
5751               bm       = aij->B->rmap->n;
5752               ba = b->a;
5753             }
5754           } else col = in[j];
5755           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5756         }
5757       }
5758     } else {
5759       if (!aij->donotstash) {
5760         if (roworiented) {
5761           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5762         } else {
5763           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5764         }
5765       }
5766     }
5767   }}
5768   PetscFunctionReturnVoid();
5769 }
5770 EXTERN_C_END
5771 
5772