xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7b6bb2c608b6fc6714ef38fda02c2dbb91c82665)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = sqrt(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew = a->nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506         } else if (in[j] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509 #endif
510         else {
511           if (mat->was_assembled) {
512             if (!aij->colmap) {
513               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
514             }
515 #if defined (PETSC_USE_CTABLE)
516             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
517 	    col--;
518 #else
519             col = aij->colmap[in[j]] - 1;
520 #endif
521             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523               col =  in[j];
524               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525               B = aij->B;
526               b = (Mat_SeqAIJ*)B->data;
527               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528               rp2      = bj + bi[row];
529               ap2      = ba + bi[row];
530               rmax2    = bimax[row];
531               nrow2    = bilen[row];
532               low2     = 0;
533               high2    = nrow2;
534               bm       = aij->B->rmap->n;
535               ba = b->a;
536             }
537           } else col = in[j];
538           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539         }
540       }
541     } else {
542       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543       if (!aij->donotstash) {
544         if (roworiented) {
545           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
546         } else {
547           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPIAIJ"
557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558 {
559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
560   PetscErrorCode ierr;
561   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
563 
564   PetscFunctionBegin;
565   for (i=0; i<m; i++) {
566     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568     if (idxm[i] >= rstart && idxm[i] < rend) {
569       row = idxm[i] - rstart;
570       for (j=0; j<n; j++) {
571         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573         if (idxn[j] >= cstart && idxn[j] < cend){
574           col = idxn[j] - cstart;
575           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
576         } else {
577           if (!aij->colmap) {
578             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
579           }
580 #if defined (PETSC_USE_CTABLE)
581           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
582           col --;
583 #else
584           col = aij->colmap[idxn[j]] - 1;
585 #endif
586           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587           else {
588             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604 {
605   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
606   PetscErrorCode ierr;
607   PetscInt       nstash,reallocs;
608   InsertMode     addv;
609 
610   PetscFunctionBegin;
611   if (aij->donotstash || mat->nooffprocentries) {
612     PetscFunctionReturn(0);
613   }
614 
615   /* make sure all processors are either in INSERTMODE or ADDMODE */
616   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
617   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618   mat->insertmode = addv; /* in case this processor had no cache */
619 
620   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
621   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
622   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
632   PetscErrorCode ierr;
633   PetscMPIInt    n;
634   PetscInt       i,j,rstart,ncols,flg;
635   PetscInt       *row,*col;
636   PetscBool      other_disassembled;
637   PetscScalar    *val;
638   InsertMode     addv = mat->insertmode;
639 
640   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
641   PetscFunctionBegin;
642   if (!aij->donotstash && !mat->nooffprocentries) {
643     while (1) {
644       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
645       if (!flg) break;
646 
647       for (i=0; i<n;) {
648         /* Now identify the consecutive vals belonging to the same row */
649         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650         if (j < n) ncols = j-i;
651         else       ncols = n-i;
652         /* Now assemble all these values with a single function call */
653         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
654         i = j;
655       }
656     }
657     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
658   }
659   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
660   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
661 
662   /* determine if any processor has disassembled, if so we must
663      also disassemble ourselfs, in order that we may reassemble. */
664   /*
665      if nonzero structure of submatrix B cannot change then we know that
666      no processor disassembled thus we can skip this stuff
667   */
668   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
669     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
670     if (mat->was_assembled && !other_disassembled) {
671       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
672     }
673   }
674   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
676   }
677   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
678   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
679   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
681 
682   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
683   aij->rowvalues = 0;
684 
685   /* used by MatAXPY() */
686   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
687   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
688 
689   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
690   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697 {
698   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
699   PetscErrorCode ierr;
700 
701   PetscFunctionBegin;
702   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
703   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatZeroRows_MPIAIJ"
709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710 {
711   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
712   PetscErrorCode    ierr;
713   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714   PetscInt          i,*owners = A->rmap->range;
715   PetscInt          *nprocs,j,idx,nsends,row;
716   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
717   PetscInt          *rvalues,count,base,slen,*source;
718   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
719   MPI_Comm          comm = ((PetscObject)A)->comm;
720   MPI_Request       *send_waits,*recv_waits;
721   MPI_Status        recv_status,*send_status;
722   const PetscScalar *xx;
723   PetscScalar       *bb;
724 #if defined(PETSC_DEBUG)
725   PetscBool      found = PETSC_FALSE;
726 #endif
727 
728   PetscFunctionBegin;
729   /*  first count number of contributors to each processor */
730   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
731   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
732   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
733   j = 0;
734   for (i=0; i<N; i++) {
735     if (lastidx > (idx = rows[i])) j = 0;
736     lastidx = idx;
737     for (; j<size; j++) {
738       if (idx >= owners[j] && idx < owners[j+1]) {
739         nprocs[2*j]++;
740         nprocs[2*j+1] = 1;
741         owner[i] = j;
742 #if defined(PETSC_DEBUG)
743         found = PETSC_TRUE;
744 #endif
745         break;
746       }
747     }
748 #if defined(PETSC_DEBUG)
749     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750     found = PETSC_FALSE;
751 #endif
752   }
753   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
754 
755   if (A->nooffproczerorows) {
756     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757     nrecvs = nsends;
758     nmax   = N;
759   } else {
760     /* inform other processors of number of messages and max length*/
761     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
762   }
763 
764   /* post receives:   */
765   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
766   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
767   for (i=0; i<nrecvs; i++) {
768     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
769   }
770 
771   /* do sends:
772       1) starts[i] gives the starting index in svalues for stuff going to
773          the ith processor
774   */
775   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
776   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
777   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
778   starts[0] = 0;
779   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780   for (i=0; i<N; i++) {
781     svalues[starts[owner[i]]++] = rows[i];
782   }
783 
784   starts[0] = 0;
785   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786   count = 0;
787   for (i=0; i<size; i++) {
788     if (nprocs[2*i+1]) {
789       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
790     }
791   }
792   ierr = PetscFree(starts);CHKERRQ(ierr);
793 
794   base = owners[rank];
795 
796   /*  wait on receives */
797   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
798   count  = nrecvs; slen = 0;
799   while (count) {
800     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
801     /* unpack receives into our local space */
802     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
803     source[imdex]  = recv_status.MPI_SOURCE;
804     lens[imdex]    = n;
805     slen          += n;
806     count--;
807   }
808   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
809 
810   /* move the data into the send scatter */
811   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
812   count = 0;
813   for (i=0; i<nrecvs; i++) {
814     values = rvalues + i*nmax;
815     for (j=0; j<lens[i]; j++) {
816       lrows[count++] = values[j] - base;
817     }
818   }
819   ierr = PetscFree(rvalues);CHKERRQ(ierr);
820   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
821   ierr = PetscFree(owner);CHKERRQ(ierr);
822   ierr = PetscFree(nprocs);CHKERRQ(ierr);
823 
824   /* fix right hand side if needed */
825   if (x && b) {
826     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
827     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
828     for (i=0; i<slen; i++) {
829       bb[lrows[i]] = diag*xx[lrows[i]];
830     }
831     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
833   }
834   /*
835         Zero the required rows. If the "diagonal block" of the matrix
836      is square and the user wishes to set the diagonal we use separate
837      code so that MatSetValues() is not called for each diagonal allocating
838      new memory, thus calling lots of mallocs and slowing things down.
839 
840   */
841   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
843   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850     }
851     for (i = 0; i < slen; i++) {
852       row  = lrows[i] + rstart;
853       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
854     }
855     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857   } else {
858     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(lrows);CHKERRQ(ierr);
861 
862   /* wait on sends */
863   if (nsends) {
864     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
865     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
866     ierr = PetscFree(send_status);CHKERRQ(ierr);
867   }
868   ierr = PetscFree(send_waits);CHKERRQ(ierr);
869   ierr = PetscFree(svalues);CHKERRQ(ierr);
870 
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
876 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
877 {
878   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
879   PetscErrorCode    ierr;
880   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
881   PetscInt          i,*owners = A->rmap->range;
882   PetscInt          *nprocs,j,idx,nsends;
883   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
884   PetscInt          *rvalues,count,base,slen,*source;
885   PetscInt          *lens,*lrows,*values,m;
886   MPI_Comm          comm = ((PetscObject)A)->comm;
887   MPI_Request       *send_waits,*recv_waits;
888   MPI_Status        recv_status,*send_status;
889   const PetscScalar *xx;
890   PetscScalar       *bb,*mask;
891   Vec               xmask,lmask;
892   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
893   const PetscInt    *aj, *ii,*ridx;
894   PetscScalar       *aa;
895 #if defined(PETSC_DEBUG)
896   PetscBool         found = PETSC_FALSE;
897 #endif
898 
899   PetscFunctionBegin;
900   /*  first count number of contributors to each processor */
901   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
902   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
903   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
904   j = 0;
905   for (i=0; i<N; i++) {
906     if (lastidx > (idx = rows[i])) j = 0;
907     lastidx = idx;
908     for (; j<size; j++) {
909       if (idx >= owners[j] && idx < owners[j+1]) {
910         nprocs[2*j]++;
911         nprocs[2*j+1] = 1;
912         owner[i] = j;
913 #if defined(PETSC_DEBUG)
914         found = PETSC_TRUE;
915 #endif
916         break;
917       }
918     }
919 #if defined(PETSC_DEBUG)
920     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
921     found = PETSC_FALSE;
922 #endif
923   }
924   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
925 
926   /* inform other processors of number of messages and max length*/
927   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
928 
929   /* post receives:   */
930   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
931   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
932   for (i=0; i<nrecvs; i++) {
933     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
934   }
935 
936   /* do sends:
937       1) starts[i] gives the starting index in svalues for stuff going to
938          the ith processor
939   */
940   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
941   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
942   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
943   starts[0] = 0;
944   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
945   for (i=0; i<N; i++) {
946     svalues[starts[owner[i]]++] = rows[i];
947   }
948 
949   starts[0] = 0;
950   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
951   count = 0;
952   for (i=0; i<size; i++) {
953     if (nprocs[2*i+1]) {
954       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
955     }
956   }
957   ierr = PetscFree(starts);CHKERRQ(ierr);
958 
959   base = owners[rank];
960 
961   /*  wait on receives */
962   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
963   count  = nrecvs; slen = 0;
964   while (count) {
965     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
966     /* unpack receives into our local space */
967     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
968     source[imdex]  = recv_status.MPI_SOURCE;
969     lens[imdex]    = n;
970     slen          += n;
971     count--;
972   }
973   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
974 
975   /* move the data into the send scatter */
976   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
977   count = 0;
978   for (i=0; i<nrecvs; i++) {
979     values = rvalues + i*nmax;
980     for (j=0; j<lens[i]; j++) {
981       lrows[count++] = values[j] - base;
982     }
983   }
984   ierr = PetscFree(rvalues);CHKERRQ(ierr);
985   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
986   ierr = PetscFree(owner);CHKERRQ(ierr);
987   ierr = PetscFree(nprocs);CHKERRQ(ierr);
988   /* lrows are the local rows to be zeroed, slen is the number of local rows */
989 
990   /* zero diagonal part of matrix */
991   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
992 
993   /* handle off diagonal part of matrix */
994   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
995   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
996   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
997   for (i=0; i<slen; i++) {
998     bb[lrows[i]] = 1;
999   }
1000   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1001   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1003   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1004   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1007   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1008   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1009 
1010   /* remove zeroed rows of off diagonal matrix */
1011   ii = aij->i;
1012   for (i=0; i<slen; i++) {
1013     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1014   }
1015 
1016   /* loop over all elements of off process part of matrix zeroing removed columns*/
1017   if (aij->compressedrow.use){
1018     m    = aij->compressedrow.nrows;
1019     ii   = aij->compressedrow.i;
1020     ridx = aij->compressedrow.rindex;
1021     for (i=0; i<m; i++){
1022       n   = ii[i+1] - ii[i];
1023       aj  = aij->j + ii[i];
1024       aa  = aij->a + ii[i];
1025 
1026       for (j=0; j<n; j++) {
1027         if (PetscAbsScalar(mask[*aj])) {
1028           bb[*ridx] -= *aa*xx[*aj];
1029           *aa        = 0.0;
1030         }
1031         aa++;
1032         aj++;
1033       }
1034       ridx++;
1035     }
1036   } else { /* do not use compressed row format */
1037     m = l->B->rmap->n;
1038    for (i=0; i<m; i++) {
1039      n   = ii[i+1] - ii[i];
1040      aj  = aij->j + ii[i];
1041      aa  = aij->a + ii[i];
1042      for (j=0; j<n; j++) {
1043         if (PetscAbsScalar(mask[*aj])) {
1044           bb[i] -= *aa*xx[*aj];
1045           *aa    = 0.0;
1046         }
1047         aa++;
1048         aj++;
1049       }
1050     }
1051   }
1052 
1053   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1054   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1055   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1056   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1057   ierr = PetscFree(lrows);CHKERRQ(ierr);
1058 
1059   /* wait on sends */
1060   if (nsends) {
1061     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1062     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1063     ierr = PetscFree(send_status);CHKERRQ(ierr);
1064   }
1065   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1066   ierr = PetscFree(svalues);CHKERRQ(ierr);
1067 
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatMult_MPIAIJ"
1073 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1074 {
1075   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1076   PetscErrorCode ierr;
1077   PetscInt       nt;
1078 
1079   PetscFunctionBegin;
1080   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1081   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1082   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1083   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1084   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1085   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1091 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1092 {
1093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1103 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1104 {
1105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1106   PetscErrorCode ierr;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1111   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1118 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1119 {
1120   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1121   PetscErrorCode ierr;
1122   PetscBool      merged;
1123 
1124   PetscFunctionBegin;
1125   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1126   /* do nondiagonal part */
1127   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1128   if (!merged) {
1129     /* send it on its way */
1130     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1131     /* do local part */
1132     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1133     /* receive remote parts: note this assumes the values are not actually */
1134     /* added in yy until the next line, */
1135     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1136   } else {
1137     /* do local part */
1138     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1139     /* send it on its way */
1140     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1141     /* values actually were received in the Begin() but we need to call this nop */
1142     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143   }
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 EXTERN_C_BEGIN
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1150 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1151 {
1152   MPI_Comm       comm;
1153   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1154   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1155   IS             Me,Notme;
1156   PetscErrorCode ierr;
1157   PetscInt       M,N,first,last,*notme,i;
1158   PetscMPIInt    size;
1159 
1160   PetscFunctionBegin;
1161 
1162   /* Easy test: symmetric diagonal block */
1163   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1164   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1165   if (!*f) PetscFunctionReturn(0);
1166   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1167   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1168   if (size == 1) PetscFunctionReturn(0);
1169 
1170   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1171   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1172   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1173   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1174   for (i=0; i<first; i++) notme[i] = i;
1175   for (i=last; i<M; i++) notme[i-last+first] = i;
1176   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1177   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1178   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1179   Aoff = Aoffs[0];
1180   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1181   Boff = Boffs[0];
1182   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1183   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1184   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1185   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1186   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1187   ierr = PetscFree(notme);CHKERRQ(ierr);
1188    PetscFunctionReturn(0);
1189 }
1190 EXTERN_C_END
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1194 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1195 {
1196   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1197   PetscErrorCode ierr;
1198 
1199   PetscFunctionBegin;
1200   /* do nondiagonal part */
1201   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1202   /* send it on its way */
1203   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1204   /* do local part */
1205   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1206   /* receive remote parts */
1207   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 /*
1212   This only works correctly for square matrices where the subblock A->A is the
1213    diagonal block
1214 */
1215 #undef __FUNCT__
1216 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1217 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1218 {
1219   PetscErrorCode ierr;
1220   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1221 
1222   PetscFunctionBegin;
1223   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1224   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1225   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatScale_MPIAIJ"
1231 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1232 {
1233   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1234   PetscErrorCode ierr;
1235 
1236   PetscFunctionBegin;
1237   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1238   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatDestroy_MPIAIJ"
1244 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1245 {
1246   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250 #if defined(PETSC_USE_LOG)
1251   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1252 #endif
1253   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1254   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1255   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1256   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1257 #if defined (PETSC_USE_CTABLE)
1258   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1259 #else
1260   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1261 #endif
1262   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1263   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1264   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1265   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1266   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1267   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1268 
1269   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1283 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1284 {
1285   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1286   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1287   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1288   PetscErrorCode    ierr;
1289   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1290   int               fd;
1291   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1292   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1293   PetscScalar       *column_values;
1294   PetscInt          message_count,flowcontrolcount;
1295 
1296   PetscFunctionBegin;
1297   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1298   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1299   nz   = A->nz + B->nz;
1300   if (!rank) {
1301     header[0] = MAT_FILE_CLASSID;
1302     header[1] = mat->rmap->N;
1303     header[2] = mat->cmap->N;
1304     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1305     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1306     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1307     /* get largest number of rows any processor has */
1308     rlen = mat->rmap->n;
1309     range = mat->rmap->range;
1310     for (i=1; i<size; i++) {
1311       rlen = PetscMax(rlen,range[i+1] - range[i]);
1312     }
1313   } else {
1314     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1315     rlen = mat->rmap->n;
1316   }
1317 
1318   /* load up the local row counts */
1319   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1320   for (i=0; i<mat->rmap->n; i++) {
1321     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1322   }
1323 
1324   /* store the row lengths to the file */
1325   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1326   if (!rank) {
1327     MPI_Status status;
1328     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1329     for (i=1; i<size; i++) {
1330       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1331       rlen = range[i+1] - range[i];
1332       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1333       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1334     }
1335     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1336   } else {
1337     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1338     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1339     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1340   }
1341   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1342 
1343   /* load up the local column indices */
1344   nzmax = nz; /* )th processor needs space a largest processor needs */
1345   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1346   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1347   cnt  = 0;
1348   for (i=0; i<mat->rmap->n; i++) {
1349     for (j=B->i[i]; j<B->i[i+1]; j++) {
1350       if ( (col = garray[B->j[j]]) > cstart) break;
1351       column_indices[cnt++] = col;
1352     }
1353     for (k=A->i[i]; k<A->i[i+1]; k++) {
1354       column_indices[cnt++] = A->j[k] + cstart;
1355     }
1356     for (; j<B->i[i+1]; j++) {
1357       column_indices[cnt++] = garray[B->j[j]];
1358     }
1359   }
1360   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1361 
1362   /* store the column indices to the file */
1363    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1364   if (!rank) {
1365     MPI_Status status;
1366     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1367     for (i=1; i<size; i++) {
1368       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1369       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1370       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1371       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1372       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1373     }
1374      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1375   } else {
1376     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1377     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1378     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1380   }
1381   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1382 
1383   /* load up the local column values */
1384   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1385   cnt  = 0;
1386   for (i=0; i<mat->rmap->n; i++) {
1387     for (j=B->i[i]; j<B->i[i+1]; j++) {
1388       if ( garray[B->j[j]] > cstart) break;
1389       column_values[cnt++] = B->a[j];
1390     }
1391     for (k=A->i[i]; k<A->i[i+1]; k++) {
1392       column_values[cnt++] = A->a[k];
1393     }
1394     for (; j<B->i[i+1]; j++) {
1395       column_values[cnt++] = B->a[j];
1396     }
1397   }
1398   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1399 
1400   /* store the column values to the file */
1401    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1402   if (!rank) {
1403     MPI_Status status;
1404     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1405     for (i=1; i<size; i++) {
1406        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1407       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1408       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1409       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1410       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1411     }
1412     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1413   } else {
1414     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1415     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1416     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1418   }
1419   ierr = PetscFree(column_values);CHKERRQ(ierr);
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1425 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1426 {
1427   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1428   PetscErrorCode    ierr;
1429   PetscMPIInt       rank = aij->rank,size = aij->size;
1430   PetscBool         isdraw,iascii,isbinary;
1431   PetscViewer       sviewer;
1432   PetscViewerFormat format;
1433 
1434   PetscFunctionBegin;
1435   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1436   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1437   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1438   if (iascii) {
1439     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1440     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1441       MatInfo    info;
1442       PetscBool  inodes;
1443 
1444       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1445       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1446       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1447       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1448       if (!inodes) {
1449         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1450 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1451       } else {
1452         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1453 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1454       }
1455       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1456       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1457       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1458       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1459       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1460       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1461       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1462       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1463       PetscFunctionReturn(0);
1464     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1465       PetscInt   inodecount,inodelimit,*inodes;
1466       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1467       if (inodes) {
1468         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1469       } else {
1470         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1471       }
1472       PetscFunctionReturn(0);
1473     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1474       PetscFunctionReturn(0);
1475     }
1476   } else if (isbinary) {
1477     if (size == 1) {
1478       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1479       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1480     } else {
1481       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1482     }
1483     PetscFunctionReturn(0);
1484   } else if (isdraw) {
1485     PetscDraw  draw;
1486     PetscBool  isnull;
1487     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1488     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1489   }
1490 
1491   if (size == 1) {
1492     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1493     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1494   } else {
1495     /* assemble the entire matrix onto first processor. */
1496     Mat         A;
1497     Mat_SeqAIJ  *Aloc;
1498     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1499     MatScalar   *a;
1500 
1501     if (mat->rmap->N > 1024) {
1502       PetscBool  flg = PETSC_FALSE;
1503 
1504       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1505       if (!flg) {
1506         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1507       }
1508     }
1509 
1510     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1511     if (!rank) {
1512       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1513     } else {
1514       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1515     }
1516     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1517     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1518     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1519     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1520 
1521     /* copy over the A part */
1522     Aloc = (Mat_SeqAIJ*)aij->A->data;
1523     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1524     row = mat->rmap->rstart;
1525     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1526     for (i=0; i<m; i++) {
1527       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1528       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1529     }
1530     aj = Aloc->j;
1531     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1532 
1533     /* copy over the B part */
1534     Aloc = (Mat_SeqAIJ*)aij->B->data;
1535     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1536     row  = mat->rmap->rstart;
1537     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1538     ct   = cols;
1539     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1540     for (i=0; i<m; i++) {
1541       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1542       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1543     }
1544     ierr = PetscFree(ct);CHKERRQ(ierr);
1545     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1546     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1547     /*
1548        Everyone has to call to draw the matrix since the graphics waits are
1549        synchronized across all processors that share the PetscDraw object
1550     */
1551     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1552     if (!rank) {
1553       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1554       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1555       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1556       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1557     }
1558     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1559     ierr = MatDestroy(&A);CHKERRQ(ierr);
1560   }
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 #undef __FUNCT__
1565 #define __FUNCT__ "MatView_MPIAIJ"
1566 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1567 {
1568   PetscErrorCode ierr;
1569   PetscBool      iascii,isdraw,issocket,isbinary;
1570 
1571   PetscFunctionBegin;
1572   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1573   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1574   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1575   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1576   if (iascii || isdraw || isbinary || issocket) {
1577     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1578   } else {
1579     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatSOR_MPIAIJ"
1586 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1587 {
1588   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1589   PetscErrorCode ierr;
1590   Vec            bb1 = 0;
1591   PetscBool      hasop;
1592 
1593   PetscFunctionBegin;
1594   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1595     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1596   }
1597 
1598   if (flag == SOR_APPLY_UPPER) {
1599     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1600     PetscFunctionReturn(0);
1601   }
1602 
1603   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1604     if (flag & SOR_ZERO_INITIAL_GUESS) {
1605       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1606       its--;
1607     }
1608 
1609     while (its--) {
1610       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1611       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1612 
1613       /* update rhs: bb1 = bb - B*x */
1614       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1615       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1616 
1617       /* local sweep */
1618       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1619     }
1620   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1621     if (flag & SOR_ZERO_INITIAL_GUESS) {
1622       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1623       its--;
1624     }
1625     while (its--) {
1626       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1627       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1628 
1629       /* update rhs: bb1 = bb - B*x */
1630       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1631       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1632 
1633       /* local sweep */
1634       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1635     }
1636   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1637     if (flag & SOR_ZERO_INITIAL_GUESS) {
1638       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1639       its--;
1640     }
1641     while (its--) {
1642       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1643       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1644 
1645       /* update rhs: bb1 = bb - B*x */
1646       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1647       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1648 
1649       /* local sweep */
1650       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1651     }
1652   }  else if (flag & SOR_EISENSTAT) {
1653     Vec         xx1;
1654 
1655     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1656     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1657 
1658     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1659     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660     if (!mat->diag) {
1661       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1662       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1663     }
1664     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1665     if (hasop) {
1666       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1667     } else {
1668       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1669     }
1670     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1671 
1672     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1673 
1674     /* local sweep */
1675     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1676     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1677     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1678   } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1679 
1680   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 #undef __FUNCT__
1685 #define __FUNCT__ "MatPermute_MPIAIJ"
1686 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1687 {
1688   MPI_Comm       comm,pcomm;
1689   PetscInt       first,local_size,nrows;
1690   const PetscInt *rows;
1691   PetscMPIInt    size;
1692   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1693   PetscErrorCode ierr;
1694 
1695   PetscFunctionBegin;
1696   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1697   /* make a collective version of 'rowp' */
1698   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1699   if (pcomm==comm) {
1700     crowp = rowp;
1701   } else {
1702     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1703     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1704     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1705     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1706   }
1707   /* collect the global row permutation and invert it */
1708   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1709   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1710   if (pcomm!=comm) {
1711     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1712   }
1713   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1714   /* get the local target indices */
1715   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1716   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1717   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1718   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1719   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1720   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1721   /* the column permutation is so much easier;
1722      make a local version of 'colp' and invert it */
1723   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1724   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1725   if (size==1) {
1726     lcolp = colp;
1727   } else {
1728     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1729     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1730     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1731   }
1732   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1733   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1734   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1735   if (size>1) {
1736     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1737     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1738   }
1739   /* now we just get the submatrix */
1740   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1741   /* clean up */
1742   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1743   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1749 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1750 {
1751   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1752   Mat            A = mat->A,B = mat->B;
1753   PetscErrorCode ierr;
1754   PetscReal      isend[5],irecv[5];
1755 
1756   PetscFunctionBegin;
1757   info->block_size     = 1.0;
1758   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1759   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1760   isend[3] = info->memory;  isend[4] = info->mallocs;
1761   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1762   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1763   isend[3] += info->memory;  isend[4] += info->mallocs;
1764   if (flag == MAT_LOCAL) {
1765     info->nz_used      = isend[0];
1766     info->nz_allocated = isend[1];
1767     info->nz_unneeded  = isend[2];
1768     info->memory       = isend[3];
1769     info->mallocs      = isend[4];
1770   } else if (flag == MAT_GLOBAL_MAX) {
1771     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1772     info->nz_used      = irecv[0];
1773     info->nz_allocated = irecv[1];
1774     info->nz_unneeded  = irecv[2];
1775     info->memory       = irecv[3];
1776     info->mallocs      = irecv[4];
1777   } else if (flag == MAT_GLOBAL_SUM) {
1778     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1779     info->nz_used      = irecv[0];
1780     info->nz_allocated = irecv[1];
1781     info->nz_unneeded  = irecv[2];
1782     info->memory       = irecv[3];
1783     info->mallocs      = irecv[4];
1784   }
1785   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1786   info->fill_ratio_needed = 0;
1787   info->factor_mallocs    = 0;
1788 
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 #undef __FUNCT__
1793 #define __FUNCT__ "MatSetOption_MPIAIJ"
1794 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1795 {
1796   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1797   PetscErrorCode ierr;
1798 
1799   PetscFunctionBegin;
1800   switch (op) {
1801   case MAT_NEW_NONZERO_LOCATIONS:
1802   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1803   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1804   case MAT_KEEP_NONZERO_PATTERN:
1805   case MAT_NEW_NONZERO_LOCATION_ERR:
1806   case MAT_USE_INODES:
1807   case MAT_IGNORE_ZERO_ENTRIES:
1808     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1809     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1810     break;
1811   case MAT_ROW_ORIENTED:
1812     a->roworiented = flg;
1813     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1814     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1815     break;
1816   case MAT_NEW_DIAGONALS:
1817     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1818     break;
1819   case MAT_IGNORE_OFF_PROC_ENTRIES:
1820     a->donotstash = PETSC_TRUE;
1821     break;
1822   case MAT_SPD:
1823     A->spd_set                         = PETSC_TRUE;
1824     A->spd                             = flg;
1825     if (flg) {
1826       A->symmetric                     = PETSC_TRUE;
1827       A->structurally_symmetric        = PETSC_TRUE;
1828       A->symmetric_set                 = PETSC_TRUE;
1829       A->structurally_symmetric_set    = PETSC_TRUE;
1830     }
1831     break;
1832   case MAT_SYMMETRIC:
1833     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1834     break;
1835   case MAT_STRUCTURALLY_SYMMETRIC:
1836     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1837     break;
1838   case MAT_HERMITIAN:
1839     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1840     break;
1841   case MAT_SYMMETRY_ETERNAL:
1842     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1843     break;
1844   default:
1845     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1846   }
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatGetRow_MPIAIJ"
1852 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1853 {
1854   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1855   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1856   PetscErrorCode ierr;
1857   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1858   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1859   PetscInt       *cmap,*idx_p;
1860 
1861   PetscFunctionBegin;
1862   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1863   mat->getrowactive = PETSC_TRUE;
1864 
1865   if (!mat->rowvalues && (idx || v)) {
1866     /*
1867         allocate enough space to hold information from the longest row.
1868     */
1869     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1870     PetscInt   max = 1,tmp;
1871     for (i=0; i<matin->rmap->n; i++) {
1872       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1873       if (max < tmp) { max = tmp; }
1874     }
1875     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1876   }
1877 
1878   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1879   lrow = row - rstart;
1880 
1881   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1882   if (!v)   {pvA = 0; pvB = 0;}
1883   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1884   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1885   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1886   nztot = nzA + nzB;
1887 
1888   cmap  = mat->garray;
1889   if (v  || idx) {
1890     if (nztot) {
1891       /* Sort by increasing column numbers, assuming A and B already sorted */
1892       PetscInt imark = -1;
1893       if (v) {
1894         *v = v_p = mat->rowvalues;
1895         for (i=0; i<nzB; i++) {
1896           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1897           else break;
1898         }
1899         imark = i;
1900         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1901         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1902       }
1903       if (idx) {
1904         *idx = idx_p = mat->rowindices;
1905         if (imark > -1) {
1906           for (i=0; i<imark; i++) {
1907             idx_p[i] = cmap[cworkB[i]];
1908           }
1909         } else {
1910           for (i=0; i<nzB; i++) {
1911             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1912             else break;
1913           }
1914           imark = i;
1915         }
1916         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1917         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1918       }
1919     } else {
1920       if (idx) *idx = 0;
1921       if (v)   *v   = 0;
1922     }
1923   }
1924   *nz = nztot;
1925   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1926   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 #undef __FUNCT__
1931 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1932 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1933 {
1934   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1935 
1936   PetscFunctionBegin;
1937   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1938   aij->getrowactive = PETSC_FALSE;
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatNorm_MPIAIJ"
1944 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1945 {
1946   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1947   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1948   PetscErrorCode ierr;
1949   PetscInt       i,j,cstart = mat->cmap->rstart;
1950   PetscReal      sum = 0.0;
1951   MatScalar      *v;
1952 
1953   PetscFunctionBegin;
1954   if (aij->size == 1) {
1955     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1956   } else {
1957     if (type == NORM_FROBENIUS) {
1958       v = amat->a;
1959       for (i=0; i<amat->nz; i++) {
1960 #if defined(PETSC_USE_COMPLEX)
1961         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1962 #else
1963         sum += (*v)*(*v); v++;
1964 #endif
1965       }
1966       v = bmat->a;
1967       for (i=0; i<bmat->nz; i++) {
1968 #if defined(PETSC_USE_COMPLEX)
1969         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1970 #else
1971         sum += (*v)*(*v); v++;
1972 #endif
1973       }
1974       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1975       *norm = sqrt(*norm);
1976     } else if (type == NORM_1) { /* max column norm */
1977       PetscReal *tmp,*tmp2;
1978       PetscInt  *jj,*garray = aij->garray;
1979       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1980       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1981       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1982       *norm = 0.0;
1983       v = amat->a; jj = amat->j;
1984       for (j=0; j<amat->nz; j++) {
1985         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1986       }
1987       v = bmat->a; jj = bmat->j;
1988       for (j=0; j<bmat->nz; j++) {
1989         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1990       }
1991       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1992       for (j=0; j<mat->cmap->N; j++) {
1993         if (tmp2[j] > *norm) *norm = tmp2[j];
1994       }
1995       ierr = PetscFree(tmp);CHKERRQ(ierr);
1996       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1997     } else if (type == NORM_INFINITY) { /* max row norm */
1998       PetscReal ntemp = 0.0;
1999       for (j=0; j<aij->A->rmap->n; j++) {
2000         v = amat->a + amat->i[j];
2001         sum = 0.0;
2002         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2003           sum += PetscAbsScalar(*v); v++;
2004         }
2005         v = bmat->a + bmat->i[j];
2006         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2007           sum += PetscAbsScalar(*v); v++;
2008         }
2009         if (sum > ntemp) ntemp = sum;
2010       }
2011       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2012     } else {
2013       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2014     }
2015   }
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 #undef __FUNCT__
2020 #define __FUNCT__ "MatTranspose_MPIAIJ"
2021 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2022 {
2023   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2024   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2025   PetscErrorCode ierr;
2026   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2027   PetscInt       cstart=A->cmap->rstart,ncol;
2028   Mat            B;
2029   MatScalar      *array;
2030 
2031   PetscFunctionBegin;
2032   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2033 
2034   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2035   ai = Aloc->i; aj = Aloc->j;
2036   bi = Bloc->i; bj = Bloc->j;
2037   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2038     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2039     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2040     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2041     for (i=0; i<ai[ma]; i++){
2042       d_nnz[aj[i]] ++;
2043       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2044     }
2045 
2046     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2047     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2048     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2049     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2050     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2051   } else {
2052     B = *matout;
2053   }
2054 
2055   /* copy over the A part */
2056   array = Aloc->a;
2057   row = A->rmap->rstart;
2058   for (i=0; i<ma; i++) {
2059     ncol = ai[i+1]-ai[i];
2060     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2061     row++; array += ncol; aj += ncol;
2062   }
2063   aj = Aloc->j;
2064   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2065 
2066   /* copy over the B part */
2067   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2068   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2069   array = Bloc->a;
2070   row = A->rmap->rstart;
2071   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2072   cols_tmp = cols;
2073   for (i=0; i<mb; i++) {
2074     ncol = bi[i+1]-bi[i];
2075     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2076     row++; array += ncol; cols_tmp += ncol;
2077   }
2078   ierr = PetscFree(cols);CHKERRQ(ierr);
2079 
2080   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2081   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2082   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2083     *matout = B;
2084   } else {
2085     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2086   }
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #undef __FUNCT__
2091 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2092 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2093 {
2094   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2095   Mat            a = aij->A,b = aij->B;
2096   PetscErrorCode ierr;
2097   PetscInt       s1,s2,s3;
2098 
2099   PetscFunctionBegin;
2100   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2101   if (rr) {
2102     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2103     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2104     /* Overlap communication with computation. */
2105     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2106   }
2107   if (ll) {
2108     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2109     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2110     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2111   }
2112   /* scale  the diagonal block */
2113   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2114 
2115   if (rr) {
2116     /* Do a scatter end and then right scale the off-diagonal block */
2117     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2118     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2119   }
2120 
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 #undef __FUNCT__
2125 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2126 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2127 {
2128   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2129   PetscErrorCode ierr;
2130 
2131   PetscFunctionBegin;
2132   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2133   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2134   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2135   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2136   PetscFunctionReturn(0);
2137 }
2138 #undef __FUNCT__
2139 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2140 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2141 {
2142   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2143   PetscErrorCode ierr;
2144 
2145   PetscFunctionBegin;
2146   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 #undef __FUNCT__
2151 #define __FUNCT__ "MatEqual_MPIAIJ"
2152 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2153 {
2154   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2155   Mat            a,b,c,d;
2156   PetscBool      flg;
2157   PetscErrorCode ierr;
2158 
2159   PetscFunctionBegin;
2160   a = matA->A; b = matA->B;
2161   c = matB->A; d = matB->B;
2162 
2163   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2164   if (flg) {
2165     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2166   }
2167   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 #undef __FUNCT__
2172 #define __FUNCT__ "MatCopy_MPIAIJ"
2173 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2174 {
2175   PetscErrorCode ierr;
2176   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2177   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2178 
2179   PetscFunctionBegin;
2180   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2181   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2182     /* because of the column compression in the off-processor part of the matrix a->B,
2183        the number of columns in a->B and b->B may be different, hence we cannot call
2184        the MatCopy() directly on the two parts. If need be, we can provide a more
2185        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2186        then copying the submatrices */
2187     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2188   } else {
2189     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2190     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2191   }
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2197 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2198 {
2199   PetscErrorCode ierr;
2200 
2201   PetscFunctionBegin;
2202   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 #undef __FUNCT__
2207 #define __FUNCT__ "MatAXPY_MPIAIJ"
2208 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2209 {
2210   PetscErrorCode ierr;
2211   PetscInt       i;
2212   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2213   PetscBLASInt   bnz,one=1;
2214   Mat_SeqAIJ     *x,*y;
2215 
2216   PetscFunctionBegin;
2217   if (str == SAME_NONZERO_PATTERN) {
2218     PetscScalar alpha = a;
2219     x = (Mat_SeqAIJ *)xx->A->data;
2220     y = (Mat_SeqAIJ *)yy->A->data;
2221     bnz = PetscBLASIntCast(x->nz);
2222     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2223     x = (Mat_SeqAIJ *)xx->B->data;
2224     y = (Mat_SeqAIJ *)yy->B->data;
2225     bnz = PetscBLASIntCast(x->nz);
2226     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2227   } else if (str == SUBSET_NONZERO_PATTERN) {
2228     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2229 
2230     x = (Mat_SeqAIJ *)xx->B->data;
2231     y = (Mat_SeqAIJ *)yy->B->data;
2232     if (y->xtoy && y->XtoY != xx->B) {
2233       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2234       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2235     }
2236     if (!y->xtoy) { /* get xtoy */
2237       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2238       y->XtoY = xx->B;
2239       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2240     }
2241     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2242   } else {
2243     Mat B;
2244     PetscInt *nnz_d,*nnz_o;
2245     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2246     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2247     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2248     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2249     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2250     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2251     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2252     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2253     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2254     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2255     ierr = MatHeaderReplace(Y,B);
2256     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2257     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2258   }
2259   PetscFunctionReturn(0);
2260 }
2261 
2262 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatConjugate_MPIAIJ"
2266 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2267 {
2268 #if defined(PETSC_USE_COMPLEX)
2269   PetscErrorCode ierr;
2270   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2271 
2272   PetscFunctionBegin;
2273   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2274   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2275 #else
2276   PetscFunctionBegin;
2277 #endif
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "MatRealPart_MPIAIJ"
2283 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2284 {
2285   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2286   PetscErrorCode ierr;
2287 
2288   PetscFunctionBegin;
2289   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2290   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2296 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2297 {
2298   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2299   PetscErrorCode ierr;
2300 
2301   PetscFunctionBegin;
2302   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2303   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 #ifdef PETSC_HAVE_PBGL
2308 
2309 #include <boost/parallel/mpi/bsp_process_group.hpp>
2310 #include <boost/graph/distributed/ilu_default_graph.hpp>
2311 #include <boost/graph/distributed/ilu_0_block.hpp>
2312 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2313 #include <boost/graph/distributed/petsc/interface.hpp>
2314 #include <boost/multi_array.hpp>
2315 #include <boost/parallel/distributed_property_map->hpp>
2316 
2317 #undef __FUNCT__
2318 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2319 /*
2320   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2321 */
2322 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2323 {
2324   namespace petsc = boost::distributed::petsc;
2325 
2326   namespace graph_dist = boost::graph::distributed;
2327   using boost::graph::distributed::ilu_default::process_group_type;
2328   using boost::graph::ilu_permuted;
2329 
2330   PetscBool       row_identity, col_identity;
2331   PetscContainer  c;
2332   PetscInt        m, n, M, N;
2333   PetscErrorCode  ierr;
2334 
2335   PetscFunctionBegin;
2336   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2337   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2338   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2339   if (!row_identity || !col_identity) {
2340     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2341   }
2342 
2343   process_group_type pg;
2344   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2345   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2346   lgraph_type&   level_graph = *lgraph_p;
2347   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2348 
2349   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2350   ilu_permuted(level_graph);
2351 
2352   /* put together the new matrix */
2353   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2354   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2355   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2356   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2357   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2358   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2359   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2360 
2361   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2362   ierr = PetscContainerSetPointer(c, lgraph_p);
2363   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2364   ierr = PetscContainerDestroy(&c);
2365   PetscFunctionReturn(0);
2366 }
2367 
2368 #undef __FUNCT__
2369 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2370 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2371 {
2372   PetscFunctionBegin;
2373   PetscFunctionReturn(0);
2374 }
2375 
2376 #undef __FUNCT__
2377 #define __FUNCT__ "MatSolve_MPIAIJ"
2378 /*
2379   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2380 */
2381 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2382 {
2383   namespace graph_dist = boost::graph::distributed;
2384 
2385   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2386   lgraph_type*   lgraph_p;
2387   PetscContainer c;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2392   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2393   ierr = VecCopy(b, x);CHKERRQ(ierr);
2394 
2395   PetscScalar* array_x;
2396   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2397   PetscInt sx;
2398   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2399 
2400   PetscScalar* array_b;
2401   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2402   PetscInt sb;
2403   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2404 
2405   lgraph_type&   level_graph = *lgraph_p;
2406   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2407 
2408   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2409   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2410                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2411 
2412   typedef boost::iterator_property_map<array_ref_type::iterator,
2413                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2414   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2415                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2416 
2417   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2418 
2419   PetscFunctionReturn(0);
2420 }
2421 #endif
2422 
2423 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2424   PetscInt       nzlocal,nsends,nrecvs;
2425   PetscMPIInt    *send_rank,*recv_rank;
2426   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2427   PetscScalar    *sbuf_a,**rbuf_a;
2428   PetscErrorCode (*Destroy)(Mat);
2429 } Mat_Redundant;
2430 
2431 #undef __FUNCT__
2432 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2433 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2434 {
2435   PetscErrorCode       ierr;
2436   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2437   PetscInt             i;
2438 
2439   PetscFunctionBegin;
2440   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2441   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2442   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2443   for (i=0; i<redund->nrecvs; i++){
2444     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2445     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2446   }
2447   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2448   ierr = PetscFree(redund);CHKERRQ(ierr);
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 #undef __FUNCT__
2453 #define __FUNCT__ "MatDestroy_MatRedundant"
2454 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2455 {
2456   PetscErrorCode  ierr;
2457   PetscContainer  container;
2458   Mat_Redundant   *redund=PETSC_NULL;
2459 
2460   PetscFunctionBegin;
2461   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2462   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2463   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2464   A->ops->destroy = redund->Destroy;
2465   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2466   if (A->ops->destroy) {
2467     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2468   }
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 #undef __FUNCT__
2473 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2474 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2475 {
2476   PetscMPIInt    rank,size;
2477   MPI_Comm       comm=((PetscObject)mat)->comm;
2478   PetscErrorCode ierr;
2479   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2480   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2481   PetscInt       *rowrange=mat->rmap->range;
2482   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2483   Mat            A=aij->A,B=aij->B,C=*matredundant;
2484   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2485   PetscScalar    *sbuf_a;
2486   PetscInt       nzlocal=a->nz+b->nz;
2487   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2488   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2489   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2490   MatScalar      *aworkA,*aworkB;
2491   PetscScalar    *vals;
2492   PetscMPIInt    tag1,tag2,tag3,imdex;
2493   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2494                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2495   MPI_Status     recv_status,*send_status;
2496   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2497   PetscInt       **rbuf_j=PETSC_NULL;
2498   PetscScalar    **rbuf_a=PETSC_NULL;
2499   Mat_Redundant  *redund=PETSC_NULL;
2500   PetscContainer container;
2501 
2502   PetscFunctionBegin;
2503   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2504   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2505 
2506   if (reuse == MAT_REUSE_MATRIX) {
2507     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2508     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2509     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2510     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2511     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2512     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2513     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2514     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2515 
2516     nsends    = redund->nsends;
2517     nrecvs    = redund->nrecvs;
2518     send_rank = redund->send_rank;
2519     recv_rank = redund->recv_rank;
2520     sbuf_nz   = redund->sbuf_nz;
2521     rbuf_nz   = redund->rbuf_nz;
2522     sbuf_j    = redund->sbuf_j;
2523     sbuf_a    = redund->sbuf_a;
2524     rbuf_j    = redund->rbuf_j;
2525     rbuf_a    = redund->rbuf_a;
2526   }
2527 
2528   if (reuse == MAT_INITIAL_MATRIX){
2529     PetscMPIInt  subrank,subsize;
2530     PetscInt     nleftover,np_subcomm;
2531     /* get the destination processors' id send_rank, nsends and nrecvs */
2532     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2533     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2534     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2535     np_subcomm = size/nsubcomm;
2536     nleftover  = size - nsubcomm*np_subcomm;
2537     nsends = 0; nrecvs = 0;
2538     for (i=0; i<size; i++){ /* i=rank*/
2539       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2540         send_rank[nsends] = i; nsends++;
2541         recv_rank[nrecvs++] = i;
2542       }
2543     }
2544     if (rank >= size - nleftover){/* this proc is a leftover processor */
2545       i = size-nleftover-1;
2546       j = 0;
2547       while (j < nsubcomm - nleftover){
2548         send_rank[nsends++] = i;
2549         i--; j++;
2550       }
2551     }
2552 
2553     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2554       for (i=0; i<nleftover; i++){
2555         recv_rank[nrecvs++] = size-nleftover+i;
2556       }
2557     }
2558 
2559     /* allocate sbuf_j, sbuf_a */
2560     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2561     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2562     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2563   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2564 
2565   /* copy mat's local entries into the buffers */
2566   if (reuse == MAT_INITIAL_MATRIX){
2567     rownz_max = 0;
2568     rptr = sbuf_j;
2569     cols = sbuf_j + rend-rstart + 1;
2570     vals = sbuf_a;
2571     rptr[0] = 0;
2572     for (i=0; i<rend-rstart; i++){
2573       row = i + rstart;
2574       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2575       ncols  = nzA + nzB;
2576       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2577       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2578       /* load the column indices for this row into cols */
2579       lwrite = 0;
2580       for (l=0; l<nzB; l++) {
2581         if ((ctmp = bmap[cworkB[l]]) < cstart){
2582           vals[lwrite]   = aworkB[l];
2583           cols[lwrite++] = ctmp;
2584         }
2585       }
2586       for (l=0; l<nzA; l++){
2587         vals[lwrite]   = aworkA[l];
2588         cols[lwrite++] = cstart + cworkA[l];
2589       }
2590       for (l=0; l<nzB; l++) {
2591         if ((ctmp = bmap[cworkB[l]]) >= cend){
2592           vals[lwrite]   = aworkB[l];
2593           cols[lwrite++] = ctmp;
2594         }
2595       }
2596       vals += ncols;
2597       cols += ncols;
2598       rptr[i+1] = rptr[i] + ncols;
2599       if (rownz_max < ncols) rownz_max = ncols;
2600     }
2601     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2602   } else { /* only copy matrix values into sbuf_a */
2603     rptr = sbuf_j;
2604     vals = sbuf_a;
2605     rptr[0] = 0;
2606     for (i=0; i<rend-rstart; i++){
2607       row = i + rstart;
2608       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2609       ncols  = nzA + nzB;
2610       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2611       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2612       lwrite = 0;
2613       for (l=0; l<nzB; l++) {
2614         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2615       }
2616       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2617       for (l=0; l<nzB; l++) {
2618         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2619       }
2620       vals += ncols;
2621       rptr[i+1] = rptr[i] + ncols;
2622     }
2623   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2624 
2625   /* send nzlocal to others, and recv other's nzlocal */
2626   /*--------------------------------------------------*/
2627   if (reuse == MAT_INITIAL_MATRIX){
2628     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2629     s_waits2 = s_waits3 + nsends;
2630     s_waits1 = s_waits2 + nsends;
2631     r_waits1 = s_waits1 + nsends;
2632     r_waits2 = r_waits1 + nrecvs;
2633     r_waits3 = r_waits2 + nrecvs;
2634   } else {
2635     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2636     r_waits3 = s_waits3 + nsends;
2637   }
2638 
2639   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2640   if (reuse == MAT_INITIAL_MATRIX){
2641     /* get new tags to keep the communication clean */
2642     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2643     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2644     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2645 
2646     /* post receives of other's nzlocal */
2647     for (i=0; i<nrecvs; i++){
2648       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2649     }
2650     /* send nzlocal to others */
2651     for (i=0; i<nsends; i++){
2652       sbuf_nz[i] = nzlocal;
2653       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2654     }
2655     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2656     count = nrecvs;
2657     while (count) {
2658       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2659       recv_rank[imdex] = recv_status.MPI_SOURCE;
2660       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2661       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2662 
2663       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2664       rbuf_nz[imdex] += i + 2;
2665       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2666       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2667       count--;
2668     }
2669     /* wait on sends of nzlocal */
2670     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2671     /* send mat->i,j to others, and recv from other's */
2672     /*------------------------------------------------*/
2673     for (i=0; i<nsends; i++){
2674       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2675       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2676     }
2677     /* wait on receives of mat->i,j */
2678     /*------------------------------*/
2679     count = nrecvs;
2680     while (count) {
2681       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2682       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2683       count--;
2684     }
2685     /* wait on sends of mat->i,j */
2686     /*---------------------------*/
2687     if (nsends) {
2688       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2689     }
2690   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2691 
2692   /* post receives, send and receive mat->a */
2693   /*----------------------------------------*/
2694   for (imdex=0; imdex<nrecvs; imdex++) {
2695     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2696   }
2697   for (i=0; i<nsends; i++){
2698     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2699   }
2700   count = nrecvs;
2701   while (count) {
2702     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2703     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2704     count--;
2705   }
2706   if (nsends) {
2707     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2708   }
2709 
2710   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2711 
2712   /* create redundant matrix */
2713   /*-------------------------*/
2714   if (reuse == MAT_INITIAL_MATRIX){
2715     /* compute rownz_max for preallocation */
2716     for (imdex=0; imdex<nrecvs; imdex++){
2717       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2718       rptr = rbuf_j[imdex];
2719       for (i=0; i<j; i++){
2720         ncols = rptr[i+1] - rptr[i];
2721         if (rownz_max < ncols) rownz_max = ncols;
2722       }
2723     }
2724 
2725     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2726     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2727     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2728     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2729     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2730   } else {
2731     C = *matredundant;
2732   }
2733 
2734   /* insert local matrix entries */
2735   rptr = sbuf_j;
2736   cols = sbuf_j + rend-rstart + 1;
2737   vals = sbuf_a;
2738   for (i=0; i<rend-rstart; i++){
2739     row   = i + rstart;
2740     ncols = rptr[i+1] - rptr[i];
2741     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2742     vals += ncols;
2743     cols += ncols;
2744   }
2745   /* insert received matrix entries */
2746   for (imdex=0; imdex<nrecvs; imdex++){
2747     rstart = rowrange[recv_rank[imdex]];
2748     rend   = rowrange[recv_rank[imdex]+1];
2749     rptr = rbuf_j[imdex];
2750     cols = rbuf_j[imdex] + rend-rstart + 1;
2751     vals = rbuf_a[imdex];
2752     for (i=0; i<rend-rstart; i++){
2753       row   = i + rstart;
2754       ncols = rptr[i+1] - rptr[i];
2755       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2756       vals += ncols;
2757       cols += ncols;
2758     }
2759   }
2760   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2761   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2762   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2763   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2764   if (reuse == MAT_INITIAL_MATRIX) {
2765     PetscContainer container;
2766     *matredundant = C;
2767     /* create a supporting struct and attach it to C for reuse */
2768     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2769     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2770     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2771     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2772     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2773     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2774 
2775     redund->nzlocal = nzlocal;
2776     redund->nsends  = nsends;
2777     redund->nrecvs  = nrecvs;
2778     redund->send_rank = send_rank;
2779     redund->recv_rank = recv_rank;
2780     redund->sbuf_nz = sbuf_nz;
2781     redund->rbuf_nz = rbuf_nz;
2782     redund->sbuf_j  = sbuf_j;
2783     redund->sbuf_a  = sbuf_a;
2784     redund->rbuf_j  = rbuf_j;
2785     redund->rbuf_a  = rbuf_a;
2786 
2787     redund->Destroy = C->ops->destroy;
2788     C->ops->destroy = MatDestroy_MatRedundant;
2789   }
2790   PetscFunctionReturn(0);
2791 }
2792 
2793 #undef __FUNCT__
2794 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2795 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2796 {
2797   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2798   PetscErrorCode ierr;
2799   PetscInt       i,*idxb = 0;
2800   PetscScalar    *va,*vb;
2801   Vec            vtmp;
2802 
2803   PetscFunctionBegin;
2804   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2805   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2806   if (idx) {
2807     for (i=0; i<A->rmap->n; i++) {
2808       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2809     }
2810   }
2811 
2812   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2813   if (idx) {
2814     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2815   }
2816   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2817   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2818 
2819   for (i=0; i<A->rmap->n; i++){
2820     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2821       va[i] = vb[i];
2822       if (idx) idx[i] = a->garray[idxb[i]];
2823     }
2824   }
2825 
2826   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2827   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2828   ierr = PetscFree(idxb);CHKERRQ(ierr);
2829   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2830   PetscFunctionReturn(0);
2831 }
2832 
2833 #undef __FUNCT__
2834 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2835 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2836 {
2837   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2838   PetscErrorCode ierr;
2839   PetscInt       i,*idxb = 0;
2840   PetscScalar    *va,*vb;
2841   Vec            vtmp;
2842 
2843   PetscFunctionBegin;
2844   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2845   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2846   if (idx) {
2847     for (i=0; i<A->cmap->n; i++) {
2848       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2849     }
2850   }
2851 
2852   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2853   if (idx) {
2854     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2855   }
2856   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2857   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2858 
2859   for (i=0; i<A->rmap->n; i++){
2860     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2861       va[i] = vb[i];
2862       if (idx) idx[i] = a->garray[idxb[i]];
2863     }
2864   }
2865 
2866   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2867   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2868   ierr = PetscFree(idxb);CHKERRQ(ierr);
2869   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2870   PetscFunctionReturn(0);
2871 }
2872 
2873 #undef __FUNCT__
2874 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2875 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2876 {
2877   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2878   PetscInt       n      = A->rmap->n;
2879   PetscInt       cstart = A->cmap->rstart;
2880   PetscInt      *cmap   = mat->garray;
2881   PetscInt      *diagIdx, *offdiagIdx;
2882   Vec            diagV, offdiagV;
2883   PetscScalar   *a, *diagA, *offdiagA;
2884   PetscInt       r;
2885   PetscErrorCode ierr;
2886 
2887   PetscFunctionBegin;
2888   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2889   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2890   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2891   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2892   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2893   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2894   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2895   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2896   for(r = 0; r < n; ++r) {
2897     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2898       a[r]   = diagA[r];
2899       idx[r] = cstart + diagIdx[r];
2900     } else {
2901       a[r]   = offdiagA[r];
2902       idx[r] = cmap[offdiagIdx[r]];
2903     }
2904   }
2905   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2906   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2907   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2908   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2909   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2910   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2911   PetscFunctionReturn(0);
2912 }
2913 
2914 #undef __FUNCT__
2915 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2916 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2917 {
2918   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2919   PetscInt       n      = A->rmap->n;
2920   PetscInt       cstart = A->cmap->rstart;
2921   PetscInt      *cmap   = mat->garray;
2922   PetscInt      *diagIdx, *offdiagIdx;
2923   Vec            diagV, offdiagV;
2924   PetscScalar   *a, *diagA, *offdiagA;
2925   PetscInt       r;
2926   PetscErrorCode ierr;
2927 
2928   PetscFunctionBegin;
2929   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2930   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2931   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2932   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2933   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2934   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2935   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2936   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2937   for(r = 0; r < n; ++r) {
2938     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2939       a[r]   = diagA[r];
2940       idx[r] = cstart + diagIdx[r];
2941     } else {
2942       a[r]   = offdiagA[r];
2943       idx[r] = cmap[offdiagIdx[r]];
2944     }
2945   }
2946   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2947   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2948   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2949   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2950   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2951   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2952   PetscFunctionReturn(0);
2953 }
2954 
2955 #undef __FUNCT__
2956 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2957 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2958 {
2959   PetscErrorCode ierr;
2960   Mat            *dummy;
2961 
2962   PetscFunctionBegin;
2963   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2964   *newmat = *dummy;
2965   ierr = PetscFree(dummy);CHKERRQ(ierr);
2966   PetscFunctionReturn(0);
2967 }
2968 
2969 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2970 /* -------------------------------------------------------------------*/
2971 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2972        MatGetRow_MPIAIJ,
2973        MatRestoreRow_MPIAIJ,
2974        MatMult_MPIAIJ,
2975 /* 4*/ MatMultAdd_MPIAIJ,
2976        MatMultTranspose_MPIAIJ,
2977        MatMultTransposeAdd_MPIAIJ,
2978 #ifdef PETSC_HAVE_PBGL
2979        MatSolve_MPIAIJ,
2980 #else
2981        0,
2982 #endif
2983        0,
2984        0,
2985 /*10*/ 0,
2986        0,
2987        0,
2988        MatSOR_MPIAIJ,
2989        MatTranspose_MPIAIJ,
2990 /*15*/ MatGetInfo_MPIAIJ,
2991        MatEqual_MPIAIJ,
2992        MatGetDiagonal_MPIAIJ,
2993        MatDiagonalScale_MPIAIJ,
2994        MatNorm_MPIAIJ,
2995 /*20*/ MatAssemblyBegin_MPIAIJ,
2996        MatAssemblyEnd_MPIAIJ,
2997        MatSetOption_MPIAIJ,
2998        MatZeroEntries_MPIAIJ,
2999 /*24*/ MatZeroRows_MPIAIJ,
3000        0,
3001 #ifdef PETSC_HAVE_PBGL
3002        0,
3003 #else
3004        0,
3005 #endif
3006        0,
3007        0,
3008 /*29*/ MatSetUpPreallocation_MPIAIJ,
3009 #ifdef PETSC_HAVE_PBGL
3010        0,
3011 #else
3012        0,
3013 #endif
3014        0,
3015        0,
3016        0,
3017 /*34*/ MatDuplicate_MPIAIJ,
3018        0,
3019        0,
3020        0,
3021        0,
3022 /*39*/ MatAXPY_MPIAIJ,
3023        MatGetSubMatrices_MPIAIJ,
3024        MatIncreaseOverlap_MPIAIJ,
3025        MatGetValues_MPIAIJ,
3026        MatCopy_MPIAIJ,
3027 /*44*/ MatGetRowMax_MPIAIJ,
3028        MatScale_MPIAIJ,
3029        0,
3030        0,
3031        MatZeroRowsColumns_MPIAIJ,
3032 /*49*/ MatSetBlockSize_MPIAIJ,
3033        0,
3034        0,
3035        0,
3036        0,
3037 /*54*/ MatFDColoringCreate_MPIAIJ,
3038        0,
3039        MatSetUnfactored_MPIAIJ,
3040        MatPermute_MPIAIJ,
3041        0,
3042 /*59*/ MatGetSubMatrix_MPIAIJ,
3043        MatDestroy_MPIAIJ,
3044        MatView_MPIAIJ,
3045        0,
3046        0,
3047 /*64*/ 0,
3048        0,
3049        0,
3050        0,
3051        0,
3052 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3053        MatGetRowMinAbs_MPIAIJ,
3054        0,
3055        MatSetColoring_MPIAIJ,
3056 #if defined(PETSC_HAVE_ADIC)
3057        MatSetValuesAdic_MPIAIJ,
3058 #else
3059        0,
3060 #endif
3061        MatSetValuesAdifor_MPIAIJ,
3062 /*75*/ MatFDColoringApply_AIJ,
3063        0,
3064        0,
3065        0,
3066        0,
3067 /*80*/ 0,
3068        0,
3069        0,
3070 /*83*/ MatLoad_MPIAIJ,
3071        0,
3072        0,
3073        0,
3074        0,
3075        0,
3076 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3077        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3078        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3079        MatPtAP_Basic,
3080        MatPtAPSymbolic_MPIAIJ,
3081 /*94*/ MatPtAPNumeric_MPIAIJ,
3082        0,
3083        0,
3084        0,
3085        0,
3086 /*99*/ 0,
3087        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3088        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3089        MatConjugate_MPIAIJ,
3090        0,
3091 /*104*/MatSetValuesRow_MPIAIJ,
3092        MatRealPart_MPIAIJ,
3093        MatImaginaryPart_MPIAIJ,
3094        0,
3095        0,
3096 /*109*/0,
3097        MatGetRedundantMatrix_MPIAIJ,
3098        MatGetRowMin_MPIAIJ,
3099        0,
3100        0,
3101 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
3102        0,
3103        0,
3104        0,
3105        0,
3106 /*119*/0,
3107        0,
3108        0,
3109        0,
3110        MatGetMultiProcBlock_MPIAIJ,
3111 /*124*/MatFindNonZeroRows_MPIAIJ,
3112        MatGetColumnNorms_MPIAIJ,
3113        0,
3114        0,
3115        MatGetSubMatricesParallel_MPIAIJ
3116 };
3117 
3118 /* ----------------------------------------------------------------------------------------*/
3119 
3120 EXTERN_C_BEGIN
3121 #undef __FUNCT__
3122 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3123 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3124 {
3125   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3126   PetscErrorCode ierr;
3127 
3128   PetscFunctionBegin;
3129   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3130   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3131   PetscFunctionReturn(0);
3132 }
3133 EXTERN_C_END
3134 
3135 EXTERN_C_BEGIN
3136 #undef __FUNCT__
3137 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3138 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3139 {
3140   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3141   PetscErrorCode ierr;
3142 
3143   PetscFunctionBegin;
3144   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3145   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3146   PetscFunctionReturn(0);
3147 }
3148 EXTERN_C_END
3149 
3150 EXTERN_C_BEGIN
3151 #undef __FUNCT__
3152 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3153 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3154 {
3155   Mat_MPIAIJ     *b;
3156   PetscErrorCode ierr;
3157   PetscInt       i;
3158 
3159   PetscFunctionBegin;
3160   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3161   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3162   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3163   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3164 
3165   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3166   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3167   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3168   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3169   if (d_nnz) {
3170     for (i=0; i<B->rmap->n; i++) {
3171       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3172     }
3173   }
3174   if (o_nnz) {
3175     for (i=0; i<B->rmap->n; i++) {
3176       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3177     }
3178   }
3179   b = (Mat_MPIAIJ*)B->data;
3180 
3181   if (!B->preallocated) {
3182     /* Explicitly create 2 MATSEQAIJ matrices. */
3183     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3184     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3185     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3186     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3187     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3188     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3189     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3190     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3191   }
3192 
3193   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3194   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3195   B->preallocated = PETSC_TRUE;
3196   PetscFunctionReturn(0);
3197 }
3198 EXTERN_C_END
3199 
3200 #undef __FUNCT__
3201 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3202 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3203 {
3204   Mat            mat;
3205   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3206   PetscErrorCode ierr;
3207 
3208   PetscFunctionBegin;
3209   *newmat       = 0;
3210   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3211   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3212   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3213   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3214   a    = (Mat_MPIAIJ*)mat->data;
3215 
3216   mat->factortype    = matin->factortype;
3217   mat->rmap->bs      = matin->rmap->bs;
3218   mat->assembled    = PETSC_TRUE;
3219   mat->insertmode   = NOT_SET_VALUES;
3220   mat->preallocated = PETSC_TRUE;
3221 
3222   a->size           = oldmat->size;
3223   a->rank           = oldmat->rank;
3224   a->donotstash     = oldmat->donotstash;
3225   a->roworiented    = oldmat->roworiented;
3226   a->rowindices     = 0;
3227   a->rowvalues      = 0;
3228   a->getrowactive   = PETSC_FALSE;
3229 
3230   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3231   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3232 
3233   if (oldmat->colmap) {
3234 #if defined (PETSC_USE_CTABLE)
3235     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3236 #else
3237     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3238     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3239     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3240 #endif
3241   } else a->colmap = 0;
3242   if (oldmat->garray) {
3243     PetscInt len;
3244     len  = oldmat->B->cmap->n;
3245     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3246     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3247     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3248   } else a->garray = 0;
3249 
3250   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3251   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3252   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3253   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3254   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3255   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3256   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3257   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3258   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3259   *newmat = mat;
3260   PetscFunctionReturn(0);
3261 }
3262 
3263 
3264 
3265 #undef __FUNCT__
3266 #define __FUNCT__ "MatLoad_MPIAIJ"
3267 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3268 {
3269   PetscScalar    *vals,*svals;
3270   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3271   PetscErrorCode ierr;
3272   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3273   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3274   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3275   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3276   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3277   int            fd;
3278 
3279   PetscFunctionBegin;
3280   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3281   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3282   if (!rank) {
3283     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3284     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3285     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3286   }
3287 
3288   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3289 
3290   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3291   M = header[1]; N = header[2];
3292   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3293   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3294   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3295 
3296   /* If global sizes are set, check if they are consistent with that given in the file */
3297   if (sizesset) {
3298     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3299   }
3300   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3301   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3302 
3303   /* determine ownership of all rows */
3304   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3305   else m = newMat->rmap->n; /* Set by user */
3306 
3307   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3308   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3309 
3310   /* First process needs enough room for process with most rows */
3311   if (!rank) {
3312     mmax       = rowners[1];
3313     for (i=2; i<size; i++) {
3314       mmax = PetscMax(mmax,rowners[i]);
3315     }
3316   } else mmax = m;
3317 
3318   rowners[0] = 0;
3319   for (i=2; i<=size; i++) {
3320     rowners[i] += rowners[i-1];
3321   }
3322   rstart = rowners[rank];
3323   rend   = rowners[rank+1];
3324 
3325   /* distribute row lengths to all processors */
3326   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3327   if (!rank) {
3328     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3329     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3330     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3331     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3332     for (j=0; j<m; j++) {
3333       procsnz[0] += ourlens[j];
3334     }
3335     for (i=1; i<size; i++) {
3336       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3337       /* calculate the number of nonzeros on each processor */
3338       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3339         procsnz[i] += rowlengths[j];
3340       }
3341       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3342     }
3343     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3344   } else {
3345     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3346   }
3347 
3348   if (!rank) {
3349     /* determine max buffer needed and allocate it */
3350     maxnz = 0;
3351     for (i=0; i<size; i++) {
3352       maxnz = PetscMax(maxnz,procsnz[i]);
3353     }
3354     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3355 
3356     /* read in my part of the matrix column indices  */
3357     nz   = procsnz[0];
3358     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3359     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3360 
3361     /* read in every one elses and ship off */
3362     for (i=1; i<size; i++) {
3363       nz     = procsnz[i];
3364       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3365       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3366     }
3367     ierr = PetscFree(cols);CHKERRQ(ierr);
3368   } else {
3369     /* determine buffer space needed for message */
3370     nz = 0;
3371     for (i=0; i<m; i++) {
3372       nz += ourlens[i];
3373     }
3374     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3375 
3376     /* receive message of column indices*/
3377     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3378   }
3379 
3380   /* determine column ownership if matrix is not square */
3381   if (N != M) {
3382     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3383     else n = newMat->cmap->n;
3384     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3385     cstart = cend - n;
3386   } else {
3387     cstart = rstart;
3388     cend   = rend;
3389     n      = cend - cstart;
3390   }
3391 
3392   /* loop over local rows, determining number of off diagonal entries */
3393   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3394   jj = 0;
3395   for (i=0; i<m; i++) {
3396     for (j=0; j<ourlens[i]; j++) {
3397       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3398       jj++;
3399     }
3400   }
3401 
3402   for (i=0; i<m; i++) {
3403     ourlens[i] -= offlens[i];
3404   }
3405   if (!sizesset) {
3406     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3407   }
3408   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3409 
3410   for (i=0; i<m; i++) {
3411     ourlens[i] += offlens[i];
3412   }
3413 
3414   if (!rank) {
3415     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3416 
3417     /* read in my part of the matrix numerical values  */
3418     nz   = procsnz[0];
3419     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3420 
3421     /* insert into matrix */
3422     jj      = rstart;
3423     smycols = mycols;
3424     svals   = vals;
3425     for (i=0; i<m; i++) {
3426       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3427       smycols += ourlens[i];
3428       svals   += ourlens[i];
3429       jj++;
3430     }
3431 
3432     /* read in other processors and ship out */
3433     for (i=1; i<size; i++) {
3434       nz     = procsnz[i];
3435       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3436       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3437     }
3438     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3439   } else {
3440     /* receive numeric values */
3441     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3442 
3443     /* receive message of values*/
3444     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3445 
3446     /* insert into matrix */
3447     jj      = rstart;
3448     smycols = mycols;
3449     svals   = vals;
3450     for (i=0; i<m; i++) {
3451       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3452       smycols += ourlens[i];
3453       svals   += ourlens[i];
3454       jj++;
3455     }
3456   }
3457   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3458   ierr = PetscFree(vals);CHKERRQ(ierr);
3459   ierr = PetscFree(mycols);CHKERRQ(ierr);
3460   ierr = PetscFree(rowners);CHKERRQ(ierr);
3461 
3462   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3463   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3464   PetscFunctionReturn(0);
3465 }
3466 
3467 #undef __FUNCT__
3468 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3469 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3470 {
3471   PetscErrorCode ierr;
3472   IS             iscol_local;
3473   PetscInt       csize;
3474 
3475   PetscFunctionBegin;
3476   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3477   if (call == MAT_REUSE_MATRIX) {
3478     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3479     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3480   } else {
3481     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3482   }
3483   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3484   if (call == MAT_INITIAL_MATRIX) {
3485     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3486     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3487   }
3488   PetscFunctionReturn(0);
3489 }
3490 
3491 #undef __FUNCT__
3492 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3493 /*
3494     Not great since it makes two copies of the submatrix, first an SeqAIJ
3495   in local and then by concatenating the local matrices the end result.
3496   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3497 
3498   Note: This requires a sequential iscol with all indices.
3499 */
3500 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3501 {
3502   PetscErrorCode ierr;
3503   PetscMPIInt    rank,size;
3504   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3505   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3506   Mat            *local,M,Mreuse;
3507   MatScalar      *vwork,*aa;
3508   MPI_Comm       comm = ((PetscObject)mat)->comm;
3509   Mat_SeqAIJ     *aij;
3510 
3511 
3512   PetscFunctionBegin;
3513   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3514   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3515 
3516   if (call ==  MAT_REUSE_MATRIX) {
3517     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3518     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3519     local = &Mreuse;
3520     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3521   } else {
3522     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3523     Mreuse = *local;
3524     ierr   = PetscFree(local);CHKERRQ(ierr);
3525   }
3526 
3527   /*
3528       m - number of local rows
3529       n - number of columns (same on all processors)
3530       rstart - first row in new global matrix generated
3531   */
3532   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3533   if (call == MAT_INITIAL_MATRIX) {
3534     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3535     ii  = aij->i;
3536     jj  = aij->j;
3537 
3538     /*
3539         Determine the number of non-zeros in the diagonal and off-diagonal
3540         portions of the matrix in order to do correct preallocation
3541     */
3542 
3543     /* first get start and end of "diagonal" columns */
3544     if (csize == PETSC_DECIDE) {
3545       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3546       if (mglobal == n) { /* square matrix */
3547 	nlocal = m;
3548       } else {
3549         nlocal = n/size + ((n % size) > rank);
3550       }
3551     } else {
3552       nlocal = csize;
3553     }
3554     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3555     rstart = rend - nlocal;
3556     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3557 
3558     /* next, compute all the lengths */
3559     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3560     olens = dlens + m;
3561     for (i=0; i<m; i++) {
3562       jend = ii[i+1] - ii[i];
3563       olen = 0;
3564       dlen = 0;
3565       for (j=0; j<jend; j++) {
3566         if (*jj < rstart || *jj >= rend) olen++;
3567         else dlen++;
3568         jj++;
3569       }
3570       olens[i] = olen;
3571       dlens[i] = dlen;
3572     }
3573     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3574     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3575     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3576     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3577     ierr = PetscFree(dlens);CHKERRQ(ierr);
3578   } else {
3579     PetscInt ml,nl;
3580 
3581     M = *newmat;
3582     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3583     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3584     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3585     /*
3586          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3587        rather than the slower MatSetValues().
3588     */
3589     M->was_assembled = PETSC_TRUE;
3590     M->assembled     = PETSC_FALSE;
3591   }
3592   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3593   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3594   ii  = aij->i;
3595   jj  = aij->j;
3596   aa  = aij->a;
3597   for (i=0; i<m; i++) {
3598     row   = rstart + i;
3599     nz    = ii[i+1] - ii[i];
3600     cwork = jj;     jj += nz;
3601     vwork = aa;     aa += nz;
3602     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3603   }
3604 
3605   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3606   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3607   *newmat = M;
3608 
3609   /* save submatrix used in processor for next request */
3610   if (call ==  MAT_INITIAL_MATRIX) {
3611     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3612     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3613   }
3614 
3615   PetscFunctionReturn(0);
3616 }
3617 
3618 EXTERN_C_BEGIN
3619 #undef __FUNCT__
3620 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3621 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3622 {
3623   PetscInt       m,cstart, cend,j,nnz,i,d;
3624   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3625   const PetscInt *JJ;
3626   PetscScalar    *values;
3627   PetscErrorCode ierr;
3628 
3629   PetscFunctionBegin;
3630   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3631 
3632   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3633   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3634   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3635   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3636   m      = B->rmap->n;
3637   cstart = B->cmap->rstart;
3638   cend   = B->cmap->rend;
3639   rstart = B->rmap->rstart;
3640 
3641   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3642 
3643 #if defined(PETSC_USE_DEBUGGING)
3644   for (i=0; i<m; i++) {
3645     nnz     = Ii[i+1]- Ii[i];
3646     JJ      = J + Ii[i];
3647     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3648     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3649     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3650   }
3651 #endif
3652 
3653   for (i=0; i<m; i++) {
3654     nnz     = Ii[i+1]- Ii[i];
3655     JJ      = J + Ii[i];
3656     nnz_max = PetscMax(nnz_max,nnz);
3657     d       = 0;
3658     for (j=0; j<nnz; j++) {
3659       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3660     }
3661     d_nnz[i] = d;
3662     o_nnz[i] = nnz - d;
3663   }
3664   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3665   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3666 
3667   if (v) values = (PetscScalar*)v;
3668   else {
3669     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3670     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3671   }
3672 
3673   for (i=0; i<m; i++) {
3674     ii   = i + rstart;
3675     nnz  = Ii[i+1]- Ii[i];
3676     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3677   }
3678   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3679   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3680 
3681   if (!v) {
3682     ierr = PetscFree(values);CHKERRQ(ierr);
3683   }
3684   PetscFunctionReturn(0);
3685 }
3686 EXTERN_C_END
3687 
3688 #undef __FUNCT__
3689 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3690 /*@
3691    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3692    (the default parallel PETSc format).
3693 
3694    Collective on MPI_Comm
3695 
3696    Input Parameters:
3697 +  B - the matrix
3698 .  i - the indices into j for the start of each local row (starts with zero)
3699 .  j - the column indices for each local row (starts with zero)
3700 -  v - optional values in the matrix
3701 
3702    Level: developer
3703 
3704    Notes:
3705        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3706      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3707      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3708 
3709        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3710 
3711        The format which is used for the sparse matrix input, is equivalent to a
3712     row-major ordering.. i.e for the following matrix, the input data expected is
3713     as shown:
3714 
3715         1 0 0
3716         2 0 3     P0
3717        -------
3718         4 5 6     P1
3719 
3720      Process0 [P0]: rows_owned=[0,1]
3721         i =  {0,1,3}  [size = nrow+1  = 2+1]
3722         j =  {0,0,2}  [size = nz = 6]
3723         v =  {1,2,3}  [size = nz = 6]
3724 
3725      Process1 [P1]: rows_owned=[2]
3726         i =  {0,3}    [size = nrow+1  = 1+1]
3727         j =  {0,1,2}  [size = nz = 6]
3728         v =  {4,5,6}  [size = nz = 6]
3729 
3730 .keywords: matrix, aij, compressed row, sparse, parallel
3731 
3732 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3733           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3734 @*/
3735 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3736 {
3737   PetscErrorCode ierr;
3738 
3739   PetscFunctionBegin;
3740   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3741   PetscFunctionReturn(0);
3742 }
3743 
3744 #undef __FUNCT__
3745 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3746 /*@C
3747    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3748    (the default parallel PETSc format).  For good matrix assembly performance
3749    the user should preallocate the matrix storage by setting the parameters
3750    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3751    performance can be increased by more than a factor of 50.
3752 
3753    Collective on MPI_Comm
3754 
3755    Input Parameters:
3756 +  A - the matrix
3757 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3758            (same value is used for all local rows)
3759 .  d_nnz - array containing the number of nonzeros in the various rows of the
3760            DIAGONAL portion of the local submatrix (possibly different for each row)
3761            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3762            The size of this array is equal to the number of local rows, i.e 'm'.
3763            You must leave room for the diagonal entry even if it is zero.
3764 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3765            submatrix (same value is used for all local rows).
3766 -  o_nnz - array containing the number of nonzeros in the various rows of the
3767            OFF-DIAGONAL portion of the local submatrix (possibly different for
3768            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3769            structure. The size of this array is equal to the number
3770            of local rows, i.e 'm'.
3771 
3772    If the *_nnz parameter is given then the *_nz parameter is ignored
3773 
3774    The AIJ format (also called the Yale sparse matrix format or
3775    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3776    storage.  The stored row and column indices begin with zero.
3777    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3778 
3779    The parallel matrix is partitioned such that the first m0 rows belong to
3780    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3781    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3782 
3783    The DIAGONAL portion of the local submatrix of a processor can be defined
3784    as the submatrix which is obtained by extraction the part corresponding to
3785    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3786    first row that belongs to the processor, r2 is the last row belonging to
3787    the this processor, and c1-c2 is range of indices of the local part of a
3788    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3789    common case of a square matrix, the row and column ranges are the same and
3790    the DIAGONAL part is also square. The remaining portion of the local
3791    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3792 
3793    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3794 
3795    You can call MatGetInfo() to get information on how effective the preallocation was;
3796    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3797    You can also run with the option -info and look for messages with the string
3798    malloc in them to see if additional memory allocation was needed.
3799 
3800    Example usage:
3801 
3802    Consider the following 8x8 matrix with 34 non-zero values, that is
3803    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3804    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3805    as follows:
3806 
3807 .vb
3808             1  2  0  |  0  3  0  |  0  4
3809     Proc0   0  5  6  |  7  0  0  |  8  0
3810             9  0 10  | 11  0  0  | 12  0
3811     -------------------------------------
3812            13  0 14  | 15 16 17  |  0  0
3813     Proc1   0 18  0  | 19 20 21  |  0  0
3814             0  0  0  | 22 23  0  | 24  0
3815     -------------------------------------
3816     Proc2  25 26 27  |  0  0 28  | 29  0
3817            30  0  0  | 31 32 33  |  0 34
3818 .ve
3819 
3820    This can be represented as a collection of submatrices as:
3821 
3822 .vb
3823       A B C
3824       D E F
3825       G H I
3826 .ve
3827 
3828    Where the submatrices A,B,C are owned by proc0, D,E,F are
3829    owned by proc1, G,H,I are owned by proc2.
3830 
3831    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3832    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3833    The 'M','N' parameters are 8,8, and have the same values on all procs.
3834 
3835    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3836    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3837    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3838    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3839    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3840    matrix, ans [DF] as another SeqAIJ matrix.
3841 
3842    When d_nz, o_nz parameters are specified, d_nz storage elements are
3843    allocated for every row of the local diagonal submatrix, and o_nz
3844    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3845    One way to choose d_nz and o_nz is to use the max nonzerors per local
3846    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3847    In this case, the values of d_nz,o_nz are:
3848 .vb
3849      proc0 : dnz = 2, o_nz = 2
3850      proc1 : dnz = 3, o_nz = 2
3851      proc2 : dnz = 1, o_nz = 4
3852 .ve
3853    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3854    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3855    for proc3. i.e we are using 12+15+10=37 storage locations to store
3856    34 values.
3857 
3858    When d_nnz, o_nnz parameters are specified, the storage is specified
3859    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3860    In the above case the values for d_nnz,o_nnz are:
3861 .vb
3862      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3863      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3864      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3865 .ve
3866    Here the space allocated is sum of all the above values i.e 34, and
3867    hence pre-allocation is perfect.
3868 
3869    Level: intermediate
3870 
3871 .keywords: matrix, aij, compressed row, sparse, parallel
3872 
3873 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3874           MPIAIJ, MatGetInfo()
3875 @*/
3876 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3877 {
3878   PetscErrorCode ierr;
3879 
3880   PetscFunctionBegin;
3881   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3882   PetscFunctionReturn(0);
3883 }
3884 
3885 #undef __FUNCT__
3886 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3887 /*@
3888      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3889          CSR format the local rows.
3890 
3891    Collective on MPI_Comm
3892 
3893    Input Parameters:
3894 +  comm - MPI communicator
3895 .  m - number of local rows (Cannot be PETSC_DECIDE)
3896 .  n - This value should be the same as the local size used in creating the
3897        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3898        calculated if N is given) For square matrices n is almost always m.
3899 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3900 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3901 .   i - row indices
3902 .   j - column indices
3903 -   a - matrix values
3904 
3905    Output Parameter:
3906 .   mat - the matrix
3907 
3908    Level: intermediate
3909 
3910    Notes:
3911        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3912      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3913      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3914 
3915        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3916 
3917        The format which is used for the sparse matrix input, is equivalent to a
3918     row-major ordering.. i.e for the following matrix, the input data expected is
3919     as shown:
3920 
3921         1 0 0
3922         2 0 3     P0
3923        -------
3924         4 5 6     P1
3925 
3926      Process0 [P0]: rows_owned=[0,1]
3927         i =  {0,1,3}  [size = nrow+1  = 2+1]
3928         j =  {0,0,2}  [size = nz = 6]
3929         v =  {1,2,3}  [size = nz = 6]
3930 
3931      Process1 [P1]: rows_owned=[2]
3932         i =  {0,3}    [size = nrow+1  = 1+1]
3933         j =  {0,1,2}  [size = nz = 6]
3934         v =  {4,5,6}  [size = nz = 6]
3935 
3936 .keywords: matrix, aij, compressed row, sparse, parallel
3937 
3938 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3939           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3940 @*/
3941 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3942 {
3943   PetscErrorCode ierr;
3944 
3945  PetscFunctionBegin;
3946   if (i[0]) {
3947     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3948   }
3949   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3950   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3951   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3952   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3953   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3954   PetscFunctionReturn(0);
3955 }
3956 
3957 #undef __FUNCT__
3958 #define __FUNCT__ "MatCreateMPIAIJ"
3959 /*@C
3960    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3961    (the default parallel PETSc format).  For good matrix assembly performance
3962    the user should preallocate the matrix storage by setting the parameters
3963    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3964    performance can be increased by more than a factor of 50.
3965 
3966    Collective on MPI_Comm
3967 
3968    Input Parameters:
3969 +  comm - MPI communicator
3970 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3971            This value should be the same as the local size used in creating the
3972            y vector for the matrix-vector product y = Ax.
3973 .  n - This value should be the same as the local size used in creating the
3974        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3975        calculated if N is given) For square matrices n is almost always m.
3976 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3977 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3978 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3979            (same value is used for all local rows)
3980 .  d_nnz - array containing the number of nonzeros in the various rows of the
3981            DIAGONAL portion of the local submatrix (possibly different for each row)
3982            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3983            The size of this array is equal to the number of local rows, i.e 'm'.
3984            You must leave room for the diagonal entry even if it is zero.
3985 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3986            submatrix (same value is used for all local rows).
3987 -  o_nnz - array containing the number of nonzeros in the various rows of the
3988            OFF-DIAGONAL portion of the local submatrix (possibly different for
3989            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3990            structure. The size of this array is equal to the number
3991            of local rows, i.e 'm'.
3992 
3993    Output Parameter:
3994 .  A - the matrix
3995 
3996    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3997    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3998    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3999 
4000    Notes:
4001    If the *_nnz parameter is given then the *_nz parameter is ignored
4002 
4003    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4004    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4005    storage requirements for this matrix.
4006 
4007    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4008    processor than it must be used on all processors that share the object for
4009    that argument.
4010 
4011    The user MUST specify either the local or global matrix dimensions
4012    (possibly both).
4013 
4014    The parallel matrix is partitioned across processors such that the
4015    first m0 rows belong to process 0, the next m1 rows belong to
4016    process 1, the next m2 rows belong to process 2 etc.. where
4017    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4018    values corresponding to [m x N] submatrix.
4019 
4020    The columns are logically partitioned with the n0 columns belonging
4021    to 0th partition, the next n1 columns belonging to the next
4022    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4023 
4024    The DIAGONAL portion of the local submatrix on any given processor
4025    is the submatrix corresponding to the rows and columns m,n
4026    corresponding to the given processor. i.e diagonal matrix on
4027    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4028    etc. The remaining portion of the local submatrix [m x (N-n)]
4029    constitute the OFF-DIAGONAL portion. The example below better
4030    illustrates this concept.
4031 
4032    For a square global matrix we define each processor's diagonal portion
4033    to be its local rows and the corresponding columns (a square submatrix);
4034    each processor's off-diagonal portion encompasses the remainder of the
4035    local matrix (a rectangular submatrix).
4036 
4037    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4038 
4039    When calling this routine with a single process communicator, a matrix of
4040    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4041    type of communicator, use the construction mechanism:
4042      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4043 
4044    By default, this format uses inodes (identical nodes) when possible.
4045    We search for consecutive rows with the same nonzero structure, thereby
4046    reusing matrix information to achieve increased efficiency.
4047 
4048    Options Database Keys:
4049 +  -mat_no_inode  - Do not use inodes
4050 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4051 -  -mat_aij_oneindex - Internally use indexing starting at 1
4052         rather than 0.  Note that when calling MatSetValues(),
4053         the user still MUST index entries starting at 0!
4054 
4055 
4056    Example usage:
4057 
4058    Consider the following 8x8 matrix with 34 non-zero values, that is
4059    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4060    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4061    as follows:
4062 
4063 .vb
4064             1  2  0  |  0  3  0  |  0  4
4065     Proc0   0  5  6  |  7  0  0  |  8  0
4066             9  0 10  | 11  0  0  | 12  0
4067     -------------------------------------
4068            13  0 14  | 15 16 17  |  0  0
4069     Proc1   0 18  0  | 19 20 21  |  0  0
4070             0  0  0  | 22 23  0  | 24  0
4071     -------------------------------------
4072     Proc2  25 26 27  |  0  0 28  | 29  0
4073            30  0  0  | 31 32 33  |  0 34
4074 .ve
4075 
4076    This can be represented as a collection of submatrices as:
4077 
4078 .vb
4079       A B C
4080       D E F
4081       G H I
4082 .ve
4083 
4084    Where the submatrices A,B,C are owned by proc0, D,E,F are
4085    owned by proc1, G,H,I are owned by proc2.
4086 
4087    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4088    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4089    The 'M','N' parameters are 8,8, and have the same values on all procs.
4090 
4091    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4092    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4093    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4094    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4095    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4096    matrix, ans [DF] as another SeqAIJ matrix.
4097 
4098    When d_nz, o_nz parameters are specified, d_nz storage elements are
4099    allocated for every row of the local diagonal submatrix, and o_nz
4100    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4101    One way to choose d_nz and o_nz is to use the max nonzerors per local
4102    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4103    In this case, the values of d_nz,o_nz are:
4104 .vb
4105      proc0 : dnz = 2, o_nz = 2
4106      proc1 : dnz = 3, o_nz = 2
4107      proc2 : dnz = 1, o_nz = 4
4108 .ve
4109    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4110    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4111    for proc3. i.e we are using 12+15+10=37 storage locations to store
4112    34 values.
4113 
4114    When d_nnz, o_nnz parameters are specified, the storage is specified
4115    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4116    In the above case the values for d_nnz,o_nnz are:
4117 .vb
4118      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4119      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4120      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4121 .ve
4122    Here the space allocated is sum of all the above values i.e 34, and
4123    hence pre-allocation is perfect.
4124 
4125    Level: intermediate
4126 
4127 .keywords: matrix, aij, compressed row, sparse, parallel
4128 
4129 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4130           MPIAIJ, MatCreateMPIAIJWithArrays()
4131 @*/
4132 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4133 {
4134   PetscErrorCode ierr;
4135   PetscMPIInt    size;
4136 
4137   PetscFunctionBegin;
4138   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4139   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4140   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4141   if (size > 1) {
4142     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4143     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4144   } else {
4145     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4146     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4147   }
4148   PetscFunctionReturn(0);
4149 }
4150 
4151 #undef __FUNCT__
4152 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4153 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4154 {
4155   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4156 
4157   PetscFunctionBegin;
4158   *Ad     = a->A;
4159   *Ao     = a->B;
4160   *colmap = a->garray;
4161   PetscFunctionReturn(0);
4162 }
4163 
4164 #undef __FUNCT__
4165 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4166 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4167 {
4168   PetscErrorCode ierr;
4169   PetscInt       i;
4170   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4171 
4172   PetscFunctionBegin;
4173   if (coloring->ctype == IS_COLORING_GLOBAL) {
4174     ISColoringValue *allcolors,*colors;
4175     ISColoring      ocoloring;
4176 
4177     /* set coloring for diagonal portion */
4178     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4179 
4180     /* set coloring for off-diagonal portion */
4181     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4182     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4183     for (i=0; i<a->B->cmap->n; i++) {
4184       colors[i] = allcolors[a->garray[i]];
4185     }
4186     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4187     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4188     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4189     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4190   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4191     ISColoringValue *colors;
4192     PetscInt        *larray;
4193     ISColoring      ocoloring;
4194 
4195     /* set coloring for diagonal portion */
4196     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4197     for (i=0; i<a->A->cmap->n; i++) {
4198       larray[i] = i + A->cmap->rstart;
4199     }
4200     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4201     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4202     for (i=0; i<a->A->cmap->n; i++) {
4203       colors[i] = coloring->colors[larray[i]];
4204     }
4205     ierr = PetscFree(larray);CHKERRQ(ierr);
4206     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4207     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4208     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4209 
4210     /* set coloring for off-diagonal portion */
4211     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4212     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4213     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4214     for (i=0; i<a->B->cmap->n; i++) {
4215       colors[i] = coloring->colors[larray[i]];
4216     }
4217     ierr = PetscFree(larray);CHKERRQ(ierr);
4218     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4219     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4220     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4221   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4222 
4223   PetscFunctionReturn(0);
4224 }
4225 
4226 #if defined(PETSC_HAVE_ADIC)
4227 #undef __FUNCT__
4228 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4229 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4230 {
4231   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4232   PetscErrorCode ierr;
4233 
4234   PetscFunctionBegin;
4235   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4236   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4237   PetscFunctionReturn(0);
4238 }
4239 #endif
4240 
4241 #undef __FUNCT__
4242 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4243 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4244 {
4245   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4246   PetscErrorCode ierr;
4247 
4248   PetscFunctionBegin;
4249   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4250   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4251   PetscFunctionReturn(0);
4252 }
4253 
4254 #undef __FUNCT__
4255 #define __FUNCT__ "MatMerge"
4256 /*@
4257       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4258                  matrices from each processor
4259 
4260     Collective on MPI_Comm
4261 
4262    Input Parameters:
4263 +    comm - the communicators the parallel matrix will live on
4264 .    inmat - the input sequential matrices
4265 .    n - number of local columns (or PETSC_DECIDE)
4266 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4267 
4268    Output Parameter:
4269 .    outmat - the parallel matrix generated
4270 
4271     Level: advanced
4272 
4273    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4274 
4275 @*/
4276 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4277 {
4278   PetscErrorCode ierr;
4279   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4280   PetscInt       *indx;
4281   PetscScalar    *values;
4282 
4283   PetscFunctionBegin;
4284   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4285   if (scall == MAT_INITIAL_MATRIX){
4286     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4287     if (n == PETSC_DECIDE){
4288       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4289     }
4290     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4291     rstart -= m;
4292 
4293     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4294     for (i=0;i<m;i++) {
4295       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4296       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4297       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4298     }
4299     /* This routine will ONLY return MPIAIJ type matrix */
4300     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4301     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4302     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4303     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4304     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4305 
4306   } else if (scall == MAT_REUSE_MATRIX){
4307     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4308   } else {
4309     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4310   }
4311 
4312   for (i=0;i<m;i++) {
4313     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4314     Ii    = i + rstart;
4315     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4316     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4317   }
4318   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4319   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4320   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4321 
4322   PetscFunctionReturn(0);
4323 }
4324 
4325 #undef __FUNCT__
4326 #define __FUNCT__ "MatFileSplit"
4327 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4328 {
4329   PetscErrorCode    ierr;
4330   PetscMPIInt       rank;
4331   PetscInt          m,N,i,rstart,nnz;
4332   size_t            len;
4333   const PetscInt    *indx;
4334   PetscViewer       out;
4335   char              *name;
4336   Mat               B;
4337   const PetscScalar *values;
4338 
4339   PetscFunctionBegin;
4340   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4341   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4342   /* Should this be the type of the diagonal block of A? */
4343   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4344   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4345   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4346   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4347   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4348   for (i=0;i<m;i++) {
4349     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4350     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4351     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4352   }
4353   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4354   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4355 
4356   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4357   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4358   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4359   sprintf(name,"%s.%d",outfile,rank);
4360   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4361   ierr = PetscFree(name);
4362   ierr = MatView(B,out);CHKERRQ(ierr);
4363   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4364   ierr = MatDestroy(&B);CHKERRQ(ierr);
4365   PetscFunctionReturn(0);
4366 }
4367 
4368 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4369 #undef __FUNCT__
4370 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4371 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4372 {
4373   PetscErrorCode       ierr;
4374   Mat_Merge_SeqsToMPI  *merge;
4375   PetscContainer       container;
4376 
4377   PetscFunctionBegin;
4378   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4379   if (container) {
4380     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4381     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4382     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4383     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4384     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4385     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4386     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4387     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4388     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4389     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4390     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4391     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4392     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4393     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4394     ierr = PetscFree(merge);CHKERRQ(ierr);
4395     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4396   }
4397   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4398   PetscFunctionReturn(0);
4399 }
4400 
4401 #include <../src/mat/utils/freespace.h>
4402 #include <petscbt.h>
4403 
4404 #undef __FUNCT__
4405 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4406 /*@C
4407       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4408                  matrices from each processor
4409 
4410     Collective on MPI_Comm
4411 
4412    Input Parameters:
4413 +    comm - the communicators the parallel matrix will live on
4414 .    seqmat - the input sequential matrices
4415 .    m - number of local rows (or PETSC_DECIDE)
4416 .    n - number of local columns (or PETSC_DECIDE)
4417 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4418 
4419    Output Parameter:
4420 .    mpimat - the parallel matrix generated
4421 
4422     Level: advanced
4423 
4424    Notes:
4425      The dimensions of the sequential matrix in each processor MUST be the same.
4426      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4427      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4428 @*/
4429 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4430 {
4431   PetscErrorCode       ierr;
4432   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4433   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4434   PetscMPIInt          size,rank,taga,*len_s;
4435   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4436   PetscInt             proc,m;
4437   PetscInt             **buf_ri,**buf_rj;
4438   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4439   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4440   MPI_Request          *s_waits,*r_waits;
4441   MPI_Status           *status;
4442   MatScalar            *aa=a->a;
4443   MatScalar            **abuf_r,*ba_i;
4444   Mat_Merge_SeqsToMPI  *merge;
4445   PetscContainer       container;
4446 
4447   PetscFunctionBegin;
4448   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4449 
4450   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4451   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4452 
4453   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4454   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4455 
4456   bi     = merge->bi;
4457   bj     = merge->bj;
4458   buf_ri = merge->buf_ri;
4459   buf_rj = merge->buf_rj;
4460 
4461   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4462   owners = merge->rowmap->range;
4463   len_s  = merge->len_s;
4464 
4465   /* send and recv matrix values */
4466   /*-----------------------------*/
4467   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4468   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4469 
4470   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4471   for (proc=0,k=0; proc<size; proc++){
4472     if (!len_s[proc]) continue;
4473     i = owners[proc];
4474     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4475     k++;
4476   }
4477 
4478   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4479   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4480   ierr = PetscFree(status);CHKERRQ(ierr);
4481 
4482   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4483   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4484 
4485   /* insert mat values of mpimat */
4486   /*----------------------------*/
4487   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4488   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4489 
4490   for (k=0; k<merge->nrecv; k++){
4491     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4492     nrows = *(buf_ri_k[k]);
4493     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4494     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4495   }
4496 
4497   /* set values of ba */
4498   m = merge->rowmap->n;
4499   for (i=0; i<m; i++) {
4500     arow = owners[rank] + i;
4501     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4502     bnzi = bi[i+1] - bi[i];
4503     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4504 
4505     /* add local non-zero vals of this proc's seqmat into ba */
4506     anzi = ai[arow+1] - ai[arow];
4507     aj   = a->j + ai[arow];
4508     aa   = a->a + ai[arow];
4509     nextaj = 0;
4510     for (j=0; nextaj<anzi; j++){
4511       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4512         ba_i[j] += aa[nextaj++];
4513       }
4514     }
4515 
4516     /* add received vals into ba */
4517     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4518       /* i-th row */
4519       if (i == *nextrow[k]) {
4520         anzi = *(nextai[k]+1) - *nextai[k];
4521         aj   = buf_rj[k] + *(nextai[k]);
4522         aa   = abuf_r[k] + *(nextai[k]);
4523         nextaj = 0;
4524         for (j=0; nextaj<anzi; j++){
4525           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4526             ba_i[j] += aa[nextaj++];
4527           }
4528         }
4529         nextrow[k]++; nextai[k]++;
4530       }
4531     }
4532     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4533   }
4534   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4535   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4536 
4537   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4538   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4539   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4540   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4541   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4542   PetscFunctionReturn(0);
4543 }
4544 
4545 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4546 
4547 #undef __FUNCT__
4548 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4549 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4550 {
4551   PetscErrorCode       ierr;
4552   Mat                  B_mpi;
4553   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4554   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4555   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4556   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4557   PetscInt             len,proc,*dnz,*onz;
4558   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4559   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4560   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4561   MPI_Status           *status;
4562   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4563   PetscBT              lnkbt;
4564   Mat_Merge_SeqsToMPI  *merge;
4565   PetscContainer       container;
4566 
4567   PetscFunctionBegin;
4568   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4569 
4570   /* make sure it is a PETSc comm */
4571   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4572   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4573   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4574 
4575   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4576   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4577 
4578   /* determine row ownership */
4579   /*---------------------------------------------------------*/
4580   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4581   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4582   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4583   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4584   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4585   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4586   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4587 
4588   m      = merge->rowmap->n;
4589   M      = merge->rowmap->N;
4590   owners = merge->rowmap->range;
4591 
4592   /* determine the number of messages to send, their lengths */
4593   /*---------------------------------------------------------*/
4594   len_s  = merge->len_s;
4595 
4596   len = 0;  /* length of buf_si[] */
4597   merge->nsend = 0;
4598   for (proc=0; proc<size; proc++){
4599     len_si[proc] = 0;
4600     if (proc == rank){
4601       len_s[proc] = 0;
4602     } else {
4603       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4604       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4605     }
4606     if (len_s[proc]) {
4607       merge->nsend++;
4608       nrows = 0;
4609       for (i=owners[proc]; i<owners[proc+1]; i++){
4610         if (ai[i+1] > ai[i]) nrows++;
4611       }
4612       len_si[proc] = 2*(nrows+1);
4613       len += len_si[proc];
4614     }
4615   }
4616 
4617   /* determine the number and length of messages to receive for ij-structure */
4618   /*-------------------------------------------------------------------------*/
4619   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4620   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4621 
4622   /* post the Irecv of j-structure */
4623   /*-------------------------------*/
4624   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4625   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4626 
4627   /* post the Isend of j-structure */
4628   /*--------------------------------*/
4629   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4630 
4631   for (proc=0, k=0; proc<size; proc++){
4632     if (!len_s[proc]) continue;
4633     i = owners[proc];
4634     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4635     k++;
4636   }
4637 
4638   /* receives and sends of j-structure are complete */
4639   /*------------------------------------------------*/
4640   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4641   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4642 
4643   /* send and recv i-structure */
4644   /*---------------------------*/
4645   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4646   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4647 
4648   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4649   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4650   for (proc=0,k=0; proc<size; proc++){
4651     if (!len_s[proc]) continue;
4652     /* form outgoing message for i-structure:
4653          buf_si[0]:                 nrows to be sent
4654                [1:nrows]:           row index (global)
4655                [nrows+1:2*nrows+1]: i-structure index
4656     */
4657     /*-------------------------------------------*/
4658     nrows = len_si[proc]/2 - 1;
4659     buf_si_i    = buf_si + nrows+1;
4660     buf_si[0]   = nrows;
4661     buf_si_i[0] = 0;
4662     nrows = 0;
4663     for (i=owners[proc]; i<owners[proc+1]; i++){
4664       anzi = ai[i+1] - ai[i];
4665       if (anzi) {
4666         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4667         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4668         nrows++;
4669       }
4670     }
4671     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4672     k++;
4673     buf_si += len_si[proc];
4674   }
4675 
4676   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4677   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4678 
4679   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4680   for (i=0; i<merge->nrecv; i++){
4681     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4682   }
4683 
4684   ierr = PetscFree(len_si);CHKERRQ(ierr);
4685   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4686   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4687   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4688   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4689   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4690   ierr = PetscFree(status);CHKERRQ(ierr);
4691 
4692   /* compute a local seq matrix in each processor */
4693   /*----------------------------------------------*/
4694   /* allocate bi array and free space for accumulating nonzero column info */
4695   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4696   bi[0] = 0;
4697 
4698   /* create and initialize a linked list */
4699   nlnk = N+1;
4700   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4701 
4702   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4703   len = 0;
4704   len  = ai[owners[rank+1]] - ai[owners[rank]];
4705   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4706   current_space = free_space;
4707 
4708   /* determine symbolic info for each local row */
4709   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4710 
4711   for (k=0; k<merge->nrecv; k++){
4712     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4713     nrows = *buf_ri_k[k];
4714     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4715     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4716   }
4717 
4718   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4719   len = 0;
4720   for (i=0;i<m;i++) {
4721     bnzi   = 0;
4722     /* add local non-zero cols of this proc's seqmat into lnk */
4723     arow   = owners[rank] + i;
4724     anzi   = ai[arow+1] - ai[arow];
4725     aj     = a->j + ai[arow];
4726     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4727     bnzi += nlnk;
4728     /* add received col data into lnk */
4729     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4730       if (i == *nextrow[k]) { /* i-th row */
4731         anzi = *(nextai[k]+1) - *nextai[k];
4732         aj   = buf_rj[k] + *nextai[k];
4733         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4734         bnzi += nlnk;
4735         nextrow[k]++; nextai[k]++;
4736       }
4737     }
4738     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4739 
4740     /* if free space is not available, make more free space */
4741     if (current_space->local_remaining<bnzi) {
4742       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4743       nspacedouble++;
4744     }
4745     /* copy data into free space, then initialize lnk */
4746     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4747     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4748 
4749     current_space->array           += bnzi;
4750     current_space->local_used      += bnzi;
4751     current_space->local_remaining -= bnzi;
4752 
4753     bi[i+1] = bi[i] + bnzi;
4754   }
4755 
4756   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4757 
4758   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4759   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4760   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4761 
4762   /* create symbolic parallel matrix B_mpi */
4763   /*---------------------------------------*/
4764   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4765   if (n==PETSC_DECIDE) {
4766     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4767   } else {
4768     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4769   }
4770   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4771   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4772   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4773 
4774   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4775   B_mpi->assembled     = PETSC_FALSE;
4776   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4777   merge->bi            = bi;
4778   merge->bj            = bj;
4779   merge->buf_ri        = buf_ri;
4780   merge->buf_rj        = buf_rj;
4781   merge->coi           = PETSC_NULL;
4782   merge->coj           = PETSC_NULL;
4783   merge->owners_co     = PETSC_NULL;
4784 
4785   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4786 
4787   /* attach the supporting struct to B_mpi for reuse */
4788   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4789   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4790   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4791   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4792   *mpimat = B_mpi;
4793 
4794   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4795   PetscFunctionReturn(0);
4796 }
4797 
4798 #undef __FUNCT__
4799 #define __FUNCT__ "MatMerge_SeqsToMPI"
4800 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4801 {
4802   PetscErrorCode   ierr;
4803 
4804   PetscFunctionBegin;
4805   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4806   if (scall == MAT_INITIAL_MATRIX){
4807     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4808   }
4809   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4810   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4811   PetscFunctionReturn(0);
4812 }
4813 
4814 #undef __FUNCT__
4815 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4816 /*@
4817      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4818           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4819           with MatGetSize()
4820 
4821     Not Collective
4822 
4823    Input Parameters:
4824 +    A - the matrix
4825 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4826 
4827    Output Parameter:
4828 .    A_loc - the local sequential matrix generated
4829 
4830     Level: developer
4831 
4832 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4833 
4834 @*/
4835 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4836 {
4837   PetscErrorCode  ierr;
4838   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4839   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4840   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4841   MatScalar       *aa=a->a,*ba=b->a,*cam;
4842   PetscScalar     *ca;
4843   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4844   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4845   PetscBool       match;
4846 
4847   PetscFunctionBegin;
4848   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4849   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4850   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4851   if (scall == MAT_INITIAL_MATRIX){
4852     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4853     ci[0] = 0;
4854     for (i=0; i<am; i++){
4855       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4856     }
4857     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4858     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4859     k = 0;
4860     for (i=0; i<am; i++) {
4861       ncols_o = bi[i+1] - bi[i];
4862       ncols_d = ai[i+1] - ai[i];
4863       /* off-diagonal portion of A */
4864       for (jo=0; jo<ncols_o; jo++) {
4865         col = cmap[*bj];
4866         if (col >= cstart) break;
4867         cj[k]   = col; bj++;
4868         ca[k++] = *ba++;
4869       }
4870       /* diagonal portion of A */
4871       for (j=0; j<ncols_d; j++) {
4872         cj[k]   = cstart + *aj++;
4873         ca[k++] = *aa++;
4874       }
4875       /* off-diagonal portion of A */
4876       for (j=jo; j<ncols_o; j++) {
4877         cj[k]   = cmap[*bj++];
4878         ca[k++] = *ba++;
4879       }
4880     }
4881     /* put together the new matrix */
4882     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4883     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4884     /* Since these are PETSc arrays, change flags to free them as necessary. */
4885     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4886     mat->free_a  = PETSC_TRUE;
4887     mat->free_ij = PETSC_TRUE;
4888     mat->nonew   = 0;
4889   } else if (scall == MAT_REUSE_MATRIX){
4890     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4891     ci = mat->i; cj = mat->j; cam = mat->a;
4892     for (i=0; i<am; i++) {
4893       /* off-diagonal portion of A */
4894       ncols_o = bi[i+1] - bi[i];
4895       for (jo=0; jo<ncols_o; jo++) {
4896         col = cmap[*bj];
4897         if (col >= cstart) break;
4898         *cam++ = *ba++; bj++;
4899       }
4900       /* diagonal portion of A */
4901       ncols_d = ai[i+1] - ai[i];
4902       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4903       /* off-diagonal portion of A */
4904       for (j=jo; j<ncols_o; j++) {
4905         *cam++ = *ba++; bj++;
4906       }
4907     }
4908   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4909   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4910   PetscFunctionReturn(0);
4911 }
4912 
4913 #undef __FUNCT__
4914 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4915 /*@C
4916      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4917 
4918     Not Collective
4919 
4920    Input Parameters:
4921 +    A - the matrix
4922 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4923 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4924 
4925    Output Parameter:
4926 .    A_loc - the local sequential matrix generated
4927 
4928     Level: developer
4929 
4930 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4931 
4932 @*/
4933 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4934 {
4935   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4936   PetscErrorCode    ierr;
4937   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4938   IS                isrowa,iscola;
4939   Mat               *aloc;
4940   PetscBool       match;
4941 
4942   PetscFunctionBegin;
4943   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4944   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4945   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4946   if (!row){
4947     start = A->rmap->rstart; end = A->rmap->rend;
4948     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4949   } else {
4950     isrowa = *row;
4951   }
4952   if (!col){
4953     start = A->cmap->rstart;
4954     cmap  = a->garray;
4955     nzA   = a->A->cmap->n;
4956     nzB   = a->B->cmap->n;
4957     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4958     ncols = 0;
4959     for (i=0; i<nzB; i++) {
4960       if (cmap[i] < start) idx[ncols++] = cmap[i];
4961       else break;
4962     }
4963     imark = i;
4964     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4965     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4966     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4967   } else {
4968     iscola = *col;
4969   }
4970   if (scall != MAT_INITIAL_MATRIX){
4971     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4972     aloc[0] = *A_loc;
4973   }
4974   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4975   *A_loc = aloc[0];
4976   ierr = PetscFree(aloc);CHKERRQ(ierr);
4977   if (!row){
4978     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4979   }
4980   if (!col){
4981     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4982   }
4983   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4984   PetscFunctionReturn(0);
4985 }
4986 
4987 #undef __FUNCT__
4988 #define __FUNCT__ "MatGetBrowsOfAcols"
4989 /*@C
4990     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4991 
4992     Collective on Mat
4993 
4994    Input Parameters:
4995 +    A,B - the matrices in mpiaij format
4996 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4997 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4998 
4999    Output Parameter:
5000 +    rowb, colb - index sets of rows and columns of B to extract
5001 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
5002 -    B_seq - the sequential matrix generated
5003 
5004     Level: developer
5005 
5006 @*/
5007 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
5008 {
5009   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5010   PetscErrorCode    ierr;
5011   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5012   IS                isrowb,iscolb;
5013   Mat               *bseq;
5014 
5015   PetscFunctionBegin;
5016   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5017     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5018   }
5019   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5020 
5021   if (scall == MAT_INITIAL_MATRIX){
5022     start = A->cmap->rstart;
5023     cmap  = a->garray;
5024     nzA   = a->A->cmap->n;
5025     nzB   = a->B->cmap->n;
5026     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5027     ncols = 0;
5028     for (i=0; i<nzB; i++) {  /* row < local row index */
5029       if (cmap[i] < start) idx[ncols++] = cmap[i];
5030       else break;
5031     }
5032     imark = i;
5033     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5034     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5035     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5036     *brstart = imark;
5037     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5038   } else {
5039     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5040     isrowb = *rowb; iscolb = *colb;
5041     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5042     bseq[0] = *B_seq;
5043   }
5044   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5045   *B_seq = bseq[0];
5046   ierr = PetscFree(bseq);CHKERRQ(ierr);
5047   if (!rowb){
5048     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5049   } else {
5050     *rowb = isrowb;
5051   }
5052   if (!colb){
5053     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5054   } else {
5055     *colb = iscolb;
5056   }
5057   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5058   PetscFunctionReturn(0);
5059 }
5060 
5061 #undef __FUNCT__
5062 #define __FUNCT__ "MatGetBrowsOfAoCols"
5063 /*@C
5064     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5065     of the OFF-DIAGONAL portion of local A
5066 
5067     Collective on Mat
5068 
5069    Input Parameters:
5070 +    A,B - the matrices in mpiaij format
5071 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5072 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5073 .    startsj_r - similar to startsj for receives
5074 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5075 
5076    Output Parameter:
5077 +    B_oth - the sequential matrix generated
5078 
5079     Level: developer
5080 
5081 @*/
5082 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5083 {
5084   VecScatter_MPI_General *gen_to,*gen_from;
5085   PetscErrorCode         ierr;
5086   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5087   Mat_SeqAIJ             *b_oth;
5088   VecScatter             ctx=a->Mvctx;
5089   MPI_Comm               comm=((PetscObject)ctx)->comm;
5090   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5091   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5092   PetscScalar            *rvalues,*svalues;
5093   MatScalar              *b_otha,*bufa,*bufA;
5094   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5095   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5096   MPI_Status             *sstatus,rstatus;
5097   PetscMPIInt            jj;
5098   PetscInt               *cols,sbs,rbs;
5099   PetscScalar            *vals;
5100 
5101   PetscFunctionBegin;
5102   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5103     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5104   }
5105   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5106   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5107 
5108   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5109   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5110   rvalues  = gen_from->values; /* holds the length of receiving row */
5111   svalues  = gen_to->values;   /* holds the length of sending row */
5112   nrecvs   = gen_from->n;
5113   nsends   = gen_to->n;
5114 
5115   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5116   srow     = gen_to->indices;   /* local row index to be sent */
5117   sstarts  = gen_to->starts;
5118   sprocs   = gen_to->procs;
5119   sstatus  = gen_to->sstatus;
5120   sbs      = gen_to->bs;
5121   rstarts  = gen_from->starts;
5122   rprocs   = gen_from->procs;
5123   rbs      = gen_from->bs;
5124 
5125   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5126   if (scall == MAT_INITIAL_MATRIX){
5127     /* i-array */
5128     /*---------*/
5129     /*  post receives */
5130     for (i=0; i<nrecvs; i++){
5131       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5132       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5133       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5134     }
5135 
5136     /* pack the outgoing message */
5137     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5138     sstartsj[0] = 0;  rstartsj[0] = 0;
5139     len = 0; /* total length of j or a array to be sent */
5140     k = 0;
5141     for (i=0; i<nsends; i++){
5142       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5143       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5144       for (j=0; j<nrows; j++) {
5145         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5146         for (l=0; l<sbs; l++){
5147           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5148           rowlen[j*sbs+l] = ncols;
5149           len += ncols;
5150           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5151         }
5152         k++;
5153       }
5154       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5155       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5156     }
5157     /* recvs and sends of i-array are completed */
5158     i = nrecvs;
5159     while (i--) {
5160       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5161     }
5162     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5163 
5164     /* allocate buffers for sending j and a arrays */
5165     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5166     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5167 
5168     /* create i-array of B_oth */
5169     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5170     b_othi[0] = 0;
5171     len = 0; /* total length of j or a array to be received */
5172     k = 0;
5173     for (i=0; i<nrecvs; i++){
5174       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5175       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5176       for (j=0; j<nrows; j++) {
5177         b_othi[k+1] = b_othi[k] + rowlen[j];
5178         len += rowlen[j]; k++;
5179       }
5180       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5181     }
5182 
5183     /* allocate space for j and a arrrays of B_oth */
5184     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5185     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5186 
5187     /* j-array */
5188     /*---------*/
5189     /*  post receives of j-array */
5190     for (i=0; i<nrecvs; i++){
5191       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5192       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5193     }
5194 
5195     /* pack the outgoing message j-array */
5196     k = 0;
5197     for (i=0; i<nsends; i++){
5198       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5199       bufJ = bufj+sstartsj[i];
5200       for (j=0; j<nrows; j++) {
5201         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5202         for (ll=0; ll<sbs; ll++){
5203           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5204           for (l=0; l<ncols; l++){
5205             *bufJ++ = cols[l];
5206           }
5207           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5208         }
5209       }
5210       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5211     }
5212 
5213     /* recvs and sends of j-array are completed */
5214     i = nrecvs;
5215     while (i--) {
5216       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5217     }
5218     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5219   } else if (scall == MAT_REUSE_MATRIX){
5220     sstartsj = *startsj;
5221     rstartsj = *startsj_r;
5222     bufa     = *bufa_ptr;
5223     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5224     b_otha   = b_oth->a;
5225   } else {
5226     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5227   }
5228 
5229   /* a-array */
5230   /*---------*/
5231   /*  post receives of a-array */
5232   for (i=0; i<nrecvs; i++){
5233     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5234     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5235   }
5236 
5237   /* pack the outgoing message a-array */
5238   k = 0;
5239   for (i=0; i<nsends; i++){
5240     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5241     bufA = bufa+sstartsj[i];
5242     for (j=0; j<nrows; j++) {
5243       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5244       for (ll=0; ll<sbs; ll++){
5245         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5246         for (l=0; l<ncols; l++){
5247           *bufA++ = vals[l];
5248         }
5249         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5250       }
5251     }
5252     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5253   }
5254   /* recvs and sends of a-array are completed */
5255   i = nrecvs;
5256   while (i--) {
5257     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5258   }
5259   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5260   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5261 
5262   if (scall == MAT_INITIAL_MATRIX){
5263     /* put together the new matrix */
5264     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5265 
5266     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5267     /* Since these are PETSc arrays, change flags to free them as necessary. */
5268     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5269     b_oth->free_a  = PETSC_TRUE;
5270     b_oth->free_ij = PETSC_TRUE;
5271     b_oth->nonew   = 0;
5272 
5273     ierr = PetscFree(bufj);CHKERRQ(ierr);
5274     if (!startsj || !bufa_ptr){
5275       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5276       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5277     } else {
5278       *startsj   = sstartsj;
5279       *startsj_r = rstartsj;
5280       *bufa_ptr  = bufa;
5281     }
5282   }
5283   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5284   PetscFunctionReturn(0);
5285 }
5286 
5287 #undef __FUNCT__
5288 #define __FUNCT__ "MatGetCommunicationStructs"
5289 /*@C
5290   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5291 
5292   Not Collective
5293 
5294   Input Parameters:
5295 . A - The matrix in mpiaij format
5296 
5297   Output Parameter:
5298 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5299 . colmap - A map from global column index to local index into lvec
5300 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5301 
5302   Level: developer
5303 
5304 @*/
5305 #if defined (PETSC_USE_CTABLE)
5306 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5307 #else
5308 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5309 #endif
5310 {
5311   Mat_MPIAIJ *a;
5312 
5313   PetscFunctionBegin;
5314   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5315   PetscValidPointer(lvec, 2);
5316   PetscValidPointer(colmap, 3);
5317   PetscValidPointer(multScatter, 4);
5318   a = (Mat_MPIAIJ *) A->data;
5319   if (lvec) *lvec = a->lvec;
5320   if (colmap) *colmap = a->colmap;
5321   if (multScatter) *multScatter = a->Mvctx;
5322   PetscFunctionReturn(0);
5323 }
5324 
5325 EXTERN_C_BEGIN
5326 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5327 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5328 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5329 EXTERN_C_END
5330 
5331 #undef __FUNCT__
5332 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5333 /*
5334     Computes (B'*A')' since computing B*A directly is untenable
5335 
5336                n                       p                          p
5337         (              )       (              )         (                  )
5338       m (      A       )  *  n (       B      )   =   m (         C        )
5339         (              )       (              )         (                  )
5340 
5341 */
5342 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5343 {
5344   PetscErrorCode     ierr;
5345   Mat                At,Bt,Ct;
5346 
5347   PetscFunctionBegin;
5348   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5349   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5350   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5351   ierr = MatDestroy(&At);CHKERRQ(ierr);
5352   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5353   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5354   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5355   PetscFunctionReturn(0);
5356 }
5357 
5358 #undef __FUNCT__
5359 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5360 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5361 {
5362   PetscErrorCode ierr;
5363   PetscInt       m=A->rmap->n,n=B->cmap->n;
5364   Mat            Cmat;
5365 
5366   PetscFunctionBegin;
5367   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5368   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5369   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5370   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5371   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5372   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5373   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5374   *C   = Cmat;
5375   PetscFunctionReturn(0);
5376 }
5377 
5378 /* ----------------------------------------------------------------*/
5379 #undef __FUNCT__
5380 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5381 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5382 {
5383   PetscErrorCode ierr;
5384 
5385   PetscFunctionBegin;
5386   if (scall == MAT_INITIAL_MATRIX){
5387     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5388   }
5389   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5390   PetscFunctionReturn(0);
5391 }
5392 
5393 EXTERN_C_BEGIN
5394 #if defined(PETSC_HAVE_MUMPS)
5395 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5396 #endif
5397 #if defined(PETSC_HAVE_PASTIX)
5398 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5399 #endif
5400 #if defined(PETSC_HAVE_SUPERLU_DIST)
5401 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5402 #endif
5403 #if defined(PETSC_HAVE_SPOOLES)
5404 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5405 #endif
5406 EXTERN_C_END
5407 
5408 /*MC
5409    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5410 
5411    Options Database Keys:
5412 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5413 
5414   Level: beginner
5415 
5416 .seealso: MatCreateMPIAIJ()
5417 M*/
5418 
5419 EXTERN_C_BEGIN
5420 #undef __FUNCT__
5421 #define __FUNCT__ "MatCreate_MPIAIJ"
5422 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5423 {
5424   Mat_MPIAIJ     *b;
5425   PetscErrorCode ierr;
5426   PetscMPIInt    size;
5427 
5428   PetscFunctionBegin;
5429   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5430 
5431   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5432   B->data         = (void*)b;
5433   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5434   B->rmap->bs     = 1;
5435   B->assembled    = PETSC_FALSE;
5436 
5437   B->insertmode   = NOT_SET_VALUES;
5438   b->size         = size;
5439   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5440 
5441   /* build cache for off array entries formed */
5442   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5443   b->donotstash  = PETSC_FALSE;
5444   b->colmap      = 0;
5445   b->garray      = 0;
5446   b->roworiented = PETSC_TRUE;
5447 
5448   /* stuff used for matrix vector multiply */
5449   b->lvec      = PETSC_NULL;
5450   b->Mvctx     = PETSC_NULL;
5451 
5452   /* stuff for MatGetRow() */
5453   b->rowindices   = 0;
5454   b->rowvalues    = 0;
5455   b->getrowactive = PETSC_FALSE;
5456 
5457 #if defined(PETSC_HAVE_SPOOLES)
5458   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5459                                      "MatGetFactor_mpiaij_spooles",
5460                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5461 #endif
5462 #if defined(PETSC_HAVE_MUMPS)
5463   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5464                                      "MatGetFactor_aij_mumps",
5465                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5466 #endif
5467 #if defined(PETSC_HAVE_PASTIX)
5468   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5469 					   "MatGetFactor_mpiaij_pastix",
5470 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5471 #endif
5472 #if defined(PETSC_HAVE_SUPERLU_DIST)
5473   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5474                                      "MatGetFactor_mpiaij_superlu_dist",
5475                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5476 #endif
5477   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5478                                      "MatStoreValues_MPIAIJ",
5479                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5480   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5481                                      "MatRetrieveValues_MPIAIJ",
5482                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5483   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5484 				     "MatGetDiagonalBlock_MPIAIJ",
5485                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5486   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5487 				     "MatIsTranspose_MPIAIJ",
5488 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5489   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5490 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5491 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5492   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5493 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5494 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5495   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5496 				     "MatDiagonalScaleLocal_MPIAIJ",
5497 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5498   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5499                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5500                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5501   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5502                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5503                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5504   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5505                                      "MatConvert_MPIAIJ_MPISBAIJ",
5506                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5507   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5508                                      "MatMatMult_MPIDense_MPIAIJ",
5509                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5510   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5511                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5512                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5513   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5514                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5515                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5516   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5517   PetscFunctionReturn(0);
5518 }
5519 EXTERN_C_END
5520 
5521 #undef __FUNCT__
5522 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5523 /*@
5524      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5525          and "off-diagonal" part of the matrix in CSR format.
5526 
5527    Collective on MPI_Comm
5528 
5529    Input Parameters:
5530 +  comm - MPI communicator
5531 .  m - number of local rows (Cannot be PETSC_DECIDE)
5532 .  n - This value should be the same as the local size used in creating the
5533        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5534        calculated if N is given) For square matrices n is almost always m.
5535 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5536 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5537 .   i - row indices for "diagonal" portion of matrix
5538 .   j - column indices
5539 .   a - matrix values
5540 .   oi - row indices for "off-diagonal" portion of matrix
5541 .   oj - column indices
5542 -   oa - matrix values
5543 
5544    Output Parameter:
5545 .   mat - the matrix
5546 
5547    Level: advanced
5548 
5549    Notes:
5550        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5551        must free the arrays once the matrix has been destroyed and not before.
5552 
5553        The i and j indices are 0 based
5554 
5555        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5556 
5557        This sets local rows and cannot be used to set off-processor values.
5558 
5559        You cannot later use MatSetValues() to change values in this matrix.
5560 
5561 .keywords: matrix, aij, compressed row, sparse, parallel
5562 
5563 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5564           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5565 @*/
5566 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5567 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5568 {
5569   PetscErrorCode ierr;
5570   Mat_MPIAIJ     *maij;
5571 
5572  PetscFunctionBegin;
5573   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5574   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5575   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5576   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5577   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5578   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5579   maij = (Mat_MPIAIJ*) (*mat)->data;
5580   maij->donotstash     = PETSC_TRUE;
5581   (*mat)->preallocated = PETSC_TRUE;
5582 
5583   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5584   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5585   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5586   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5587 
5588   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5589   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5590 
5591   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5592   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5593   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5594   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5595 
5596   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5597   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5598   PetscFunctionReturn(0);
5599 }
5600 
5601 /*
5602     Special version for direct calls from Fortran
5603 */
5604 #include <private/fortranimpl.h>
5605 
5606 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5607 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5608 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5609 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5610 #endif
5611 
5612 /* Change these macros so can be used in void function */
5613 #undef CHKERRQ
5614 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5615 #undef SETERRQ2
5616 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5617 #undef SETERRQ
5618 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5619 
5620 EXTERN_C_BEGIN
5621 #undef __FUNCT__
5622 #define __FUNCT__ "matsetvaluesmpiaij_"
5623 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5624 {
5625   Mat             mat = *mmat;
5626   PetscInt        m = *mm, n = *mn;
5627   InsertMode      addv = *maddv;
5628   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5629   PetscScalar     value;
5630   PetscErrorCode  ierr;
5631 
5632   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5633   if (mat->insertmode == NOT_SET_VALUES) {
5634     mat->insertmode = addv;
5635   }
5636 #if defined(PETSC_USE_DEBUG)
5637   else if (mat->insertmode != addv) {
5638     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5639   }
5640 #endif
5641   {
5642   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5643   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5644   PetscBool       roworiented = aij->roworiented;
5645 
5646   /* Some Variables required in the macro */
5647   Mat             A = aij->A;
5648   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5649   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5650   MatScalar       *aa = a->a;
5651   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5652   Mat             B = aij->B;
5653   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5654   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5655   MatScalar       *ba = b->a;
5656 
5657   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5658   PetscInt        nonew = a->nonew;
5659   MatScalar       *ap1,*ap2;
5660 
5661   PetscFunctionBegin;
5662   for (i=0; i<m; i++) {
5663     if (im[i] < 0) continue;
5664 #if defined(PETSC_USE_DEBUG)
5665     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5666 #endif
5667     if (im[i] >= rstart && im[i] < rend) {
5668       row      = im[i] - rstart;
5669       lastcol1 = -1;
5670       rp1      = aj + ai[row];
5671       ap1      = aa + ai[row];
5672       rmax1    = aimax[row];
5673       nrow1    = ailen[row];
5674       low1     = 0;
5675       high1    = nrow1;
5676       lastcol2 = -1;
5677       rp2      = bj + bi[row];
5678       ap2      = ba + bi[row];
5679       rmax2    = bimax[row];
5680       nrow2    = bilen[row];
5681       low2     = 0;
5682       high2    = nrow2;
5683 
5684       for (j=0; j<n; j++) {
5685         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5686         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5687         if (in[j] >= cstart && in[j] < cend){
5688           col = in[j] - cstart;
5689           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5690         } else if (in[j] < 0) continue;
5691 #if defined(PETSC_USE_DEBUG)
5692         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5693 #endif
5694         else {
5695           if (mat->was_assembled) {
5696             if (!aij->colmap) {
5697               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5698             }
5699 #if defined (PETSC_USE_CTABLE)
5700             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5701 	    col--;
5702 #else
5703             col = aij->colmap[in[j]] - 1;
5704 #endif
5705             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5706               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5707               col =  in[j];
5708               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5709               B = aij->B;
5710               b = (Mat_SeqAIJ*)B->data;
5711               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5712               rp2      = bj + bi[row];
5713               ap2      = ba + bi[row];
5714               rmax2    = bimax[row];
5715               nrow2    = bilen[row];
5716               low2     = 0;
5717               high2    = nrow2;
5718               bm       = aij->B->rmap->n;
5719               ba = b->a;
5720             }
5721           } else col = in[j];
5722           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5723         }
5724       }
5725     } else {
5726       if (!aij->donotstash) {
5727         if (roworiented) {
5728           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5729         } else {
5730           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5731         }
5732       }
5733     }
5734   }}
5735   PetscFunctionReturnVoid();
5736 }
5737 EXTERN_C_END
5738 
5739