xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 511c7730c31e50e28ad0b6cd5ea706dbebda73c8)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew = a->nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506         } else if (in[j] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509 #endif
510         else {
511           if (mat->was_assembled) {
512             if (!aij->colmap) {
513               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
514             }
515 #if defined (PETSC_USE_CTABLE)
516             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
517 	    col--;
518 #else
519             col = aij->colmap[in[j]] - 1;
520 #endif
521             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523               col =  in[j];
524               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525               B = aij->B;
526               b = (Mat_SeqAIJ*)B->data;
527               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528               rp2      = bj + bi[row];
529               ap2      = ba + bi[row];
530               rmax2    = bimax[row];
531               nrow2    = bilen[row];
532               low2     = 0;
533               high2    = nrow2;
534               bm       = aij->B->rmap->n;
535               ba = b->a;
536             }
537           } else col = in[j];
538           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539         }
540       }
541     } else {
542       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543       if (!aij->donotstash) {
544         if (roworiented) {
545           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
546         } else {
547           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPIAIJ"
557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558 {
559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
560   PetscErrorCode ierr;
561   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
563 
564   PetscFunctionBegin;
565   for (i=0; i<m; i++) {
566     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568     if (idxm[i] >= rstart && idxm[i] < rend) {
569       row = idxm[i] - rstart;
570       for (j=0; j<n; j++) {
571         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573         if (idxn[j] >= cstart && idxn[j] < cend){
574           col = idxn[j] - cstart;
575           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
576         } else {
577           if (!aij->colmap) {
578             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
579           }
580 #if defined (PETSC_USE_CTABLE)
581           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
582           col --;
583 #else
584           col = aij->colmap[idxn[j]] - 1;
585 #endif
586           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587           else {
588             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604 {
605   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
606   PetscErrorCode ierr;
607   PetscInt       nstash,reallocs;
608   InsertMode     addv;
609 
610   PetscFunctionBegin;
611   if (aij->donotstash || mat->nooffprocentries) {
612     PetscFunctionReturn(0);
613   }
614 
615   /* make sure all processors are either in INSERTMODE or ADDMODE */
616   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
617   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618   mat->insertmode = addv; /* in case this processor had no cache */
619 
620   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
621   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
622   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
632   PetscErrorCode ierr;
633   PetscMPIInt    n;
634   PetscInt       i,j,rstart,ncols,flg;
635   PetscInt       *row,*col;
636   PetscBool      other_disassembled;
637   PetscScalar    *val;
638   InsertMode     addv = mat->insertmode;
639 
640   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
641   PetscFunctionBegin;
642   if (!aij->donotstash && !mat->nooffprocentries) {
643     while (1) {
644       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
645       if (!flg) break;
646 
647       for (i=0; i<n;) {
648         /* Now identify the consecutive vals belonging to the same row */
649         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650         if (j < n) ncols = j-i;
651         else       ncols = n-i;
652         /* Now assemble all these values with a single function call */
653         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
654         i = j;
655       }
656     }
657     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
658   }
659   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
660   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
661 
662   /* determine if any processor has disassembled, if so we must
663      also disassemble ourselfs, in order that we may reassemble. */
664   /*
665      if nonzero structure of submatrix B cannot change then we know that
666      no processor disassembled thus we can skip this stuff
667   */
668   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
669     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
670     if (mat->was_assembled && !other_disassembled) {
671       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
672     }
673   }
674   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
676   }
677   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
678   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
679   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
681 
682   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
683   aij->rowvalues = 0;
684 
685   /* used by MatAXPY() */
686   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
687   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
688 
689   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
690   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697 {
698   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
699   PetscErrorCode ierr;
700 
701   PetscFunctionBegin;
702   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
703   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatZeroRows_MPIAIJ"
709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710 {
711   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
712   PetscErrorCode    ierr;
713   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714   PetscInt          i,*owners = A->rmap->range;
715   PetscInt          *nprocs,j,idx,nsends,row;
716   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
717   PetscInt          *rvalues,count,base,slen,*source;
718   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
719   MPI_Comm          comm = ((PetscObject)A)->comm;
720   MPI_Request       *send_waits,*recv_waits;
721   MPI_Status        recv_status,*send_status;
722   const PetscScalar *xx;
723   PetscScalar       *bb;
724 #if defined(PETSC_DEBUG)
725   PetscBool      found = PETSC_FALSE;
726 #endif
727 
728   PetscFunctionBegin;
729   /*  first count number of contributors to each processor */
730   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
731   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
732   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
733   j = 0;
734   for (i=0; i<N; i++) {
735     if (lastidx > (idx = rows[i])) j = 0;
736     lastidx = idx;
737     for (; j<size; j++) {
738       if (idx >= owners[j] && idx < owners[j+1]) {
739         nprocs[2*j]++;
740         nprocs[2*j+1] = 1;
741         owner[i] = j;
742 #if defined(PETSC_DEBUG)
743         found = PETSC_TRUE;
744 #endif
745         break;
746       }
747     }
748 #if defined(PETSC_DEBUG)
749     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750     found = PETSC_FALSE;
751 #endif
752   }
753   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
754 
755   if (A->nooffproczerorows) {
756     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757     nrecvs = nsends;
758     nmax   = N;
759   } else {
760     /* inform other processors of number of messages and max length*/
761     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
762   }
763 
764   /* post receives:   */
765   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
766   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
767   for (i=0; i<nrecvs; i++) {
768     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
769   }
770 
771   /* do sends:
772       1) starts[i] gives the starting index in svalues for stuff going to
773          the ith processor
774   */
775   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
776   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
777   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
778   starts[0] = 0;
779   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780   for (i=0; i<N; i++) {
781     svalues[starts[owner[i]]++] = rows[i];
782   }
783 
784   starts[0] = 0;
785   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786   count = 0;
787   for (i=0; i<size; i++) {
788     if (nprocs[2*i+1]) {
789       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
790     }
791   }
792   ierr = PetscFree(starts);CHKERRQ(ierr);
793 
794   base = owners[rank];
795 
796   /*  wait on receives */
797   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
798   count  = nrecvs; slen = 0;
799   while (count) {
800     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
801     /* unpack receives into our local space */
802     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
803     source[imdex]  = recv_status.MPI_SOURCE;
804     lens[imdex]    = n;
805     slen          += n;
806     count--;
807   }
808   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
809 
810   /* move the data into the send scatter */
811   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
812   count = 0;
813   for (i=0; i<nrecvs; i++) {
814     values = rvalues + i*nmax;
815     for (j=0; j<lens[i]; j++) {
816       lrows[count++] = values[j] - base;
817     }
818   }
819   ierr = PetscFree(rvalues);CHKERRQ(ierr);
820   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
821   ierr = PetscFree(owner);CHKERRQ(ierr);
822   ierr = PetscFree(nprocs);CHKERRQ(ierr);
823 
824   /* fix right hand side if needed */
825   if (x && b) {
826     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
827     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
828     for (i=0; i<slen; i++) {
829       bb[lrows[i]] = diag*xx[lrows[i]];
830     }
831     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
833   }
834   /*
835         Zero the required rows. If the "diagonal block" of the matrix
836      is square and the user wishes to set the diagonal we use separate
837      code so that MatSetValues() is not called for each diagonal allocating
838      new memory, thus calling lots of mallocs and slowing things down.
839 
840   */
841   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
843   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850     }
851     for (i = 0; i < slen; i++) {
852       row  = lrows[i] + rstart;
853       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
854     }
855     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857   } else {
858     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(lrows);CHKERRQ(ierr);
861 
862   /* wait on sends */
863   if (nsends) {
864     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
865     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
866     ierr = PetscFree(send_status);CHKERRQ(ierr);
867   }
868   ierr = PetscFree(send_waits);CHKERRQ(ierr);
869   ierr = PetscFree(svalues);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
876 {
877   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode    ierr;
879   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
880   PetscInt          i,*owners = A->rmap->range;
881   PetscInt          *nprocs,j,idx,nsends;
882   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
883   PetscInt          *rvalues,count,base,slen,*source;
884   PetscInt          *lens,*lrows,*values,m;
885   MPI_Comm          comm = ((PetscObject)A)->comm;
886   MPI_Request       *send_waits,*recv_waits;
887   MPI_Status        recv_status,*send_status;
888   const PetscScalar *xx;
889   PetscScalar       *bb,*mask;
890   Vec               xmask,lmask;
891   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
892   const PetscInt    *aj, *ii,*ridx;
893   PetscScalar       *aa;
894 #if defined(PETSC_DEBUG)
895   PetscBool         found = PETSC_FALSE;
896 #endif
897 
898   PetscFunctionBegin;
899   /*  first count number of contributors to each processor */
900   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
901   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
902   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
903   j = 0;
904   for (i=0; i<N; i++) {
905     if (lastidx > (idx = rows[i])) j = 0;
906     lastidx = idx;
907     for (; j<size; j++) {
908       if (idx >= owners[j] && idx < owners[j+1]) {
909         nprocs[2*j]++;
910         nprocs[2*j+1] = 1;
911         owner[i] = j;
912 #if defined(PETSC_DEBUG)
913         found = PETSC_TRUE;
914 #endif
915         break;
916       }
917     }
918 #if defined(PETSC_DEBUG)
919     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
920     found = PETSC_FALSE;
921 #endif
922   }
923   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
924 
925   /* inform other processors of number of messages and max length*/
926   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
927 
928   /* post receives:   */
929   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
930   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
931   for (i=0; i<nrecvs; i++) {
932     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
933   }
934 
935   /* do sends:
936       1) starts[i] gives the starting index in svalues for stuff going to
937          the ith processor
938   */
939   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
940   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
941   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
942   starts[0] = 0;
943   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
944   for (i=0; i<N; i++) {
945     svalues[starts[owner[i]]++] = rows[i];
946   }
947 
948   starts[0] = 0;
949   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
950   count = 0;
951   for (i=0; i<size; i++) {
952     if (nprocs[2*i+1]) {
953       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
954     }
955   }
956   ierr = PetscFree(starts);CHKERRQ(ierr);
957 
958   base = owners[rank];
959 
960   /*  wait on receives */
961   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
962   count  = nrecvs; slen = 0;
963   while (count) {
964     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
965     /* unpack receives into our local space */
966     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
967     source[imdex]  = recv_status.MPI_SOURCE;
968     lens[imdex]    = n;
969     slen          += n;
970     count--;
971   }
972   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
973 
974   /* move the data into the send scatter */
975   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
976   count = 0;
977   for (i=0; i<nrecvs; i++) {
978     values = rvalues + i*nmax;
979     for (j=0; j<lens[i]; j++) {
980       lrows[count++] = values[j] - base;
981     }
982   }
983   ierr = PetscFree(rvalues);CHKERRQ(ierr);
984   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
985   ierr = PetscFree(owner);CHKERRQ(ierr);
986   ierr = PetscFree(nprocs);CHKERRQ(ierr);
987   /* lrows are the local rows to be zeroed, slen is the number of local rows */
988 
989   /* zero diagonal part of matrix */
990   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
991 
992   /* handle off diagonal part of matrix */
993   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
994   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
995   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
996   for (i=0; i<slen; i++) {
997     bb[lrows[i]] = 1;
998   }
999   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1000   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1003   if (x) {
1004     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1007     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1008   }
1009   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1010 
1011   /* remove zeroed rows of off diagonal matrix */
1012   ii = aij->i;
1013   for (i=0; i<slen; i++) {
1014     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1015   }
1016 
1017   /* loop over all elements of off process part of matrix zeroing removed columns*/
1018   if (aij->compressedrow.use){
1019     m    = aij->compressedrow.nrows;
1020     ii   = aij->compressedrow.i;
1021     ridx = aij->compressedrow.rindex;
1022     for (i=0; i<m; i++){
1023       n   = ii[i+1] - ii[i];
1024       aj  = aij->j + ii[i];
1025       aa  = aij->a + ii[i];
1026 
1027       for (j=0; j<n; j++) {
1028         if (PetscAbsScalar(mask[*aj])) {
1029           if (b) bb[*ridx] -= *aa*xx[*aj];
1030           *aa        = 0.0;
1031         }
1032         aa++;
1033         aj++;
1034       }
1035       ridx++;
1036     }
1037   } else { /* do not use compressed row format */
1038     m = l->B->rmap->n;
1039     for (i=0; i<m; i++) {
1040       n   = ii[i+1] - ii[i];
1041       aj  = aij->j + ii[i];
1042       aa  = aij->a + ii[i];
1043       for (j=0; j<n; j++) {
1044         if (PetscAbsScalar(mask[*aj])) {
1045           if (b) bb[i] -= *aa*xx[*aj];
1046           *aa    = 0.0;
1047         }
1048         aa++;
1049         aj++;
1050       }
1051     }
1052   }
1053   if (x) {
1054     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1055     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1056   }
1057   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1058   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1059   ierr = PetscFree(lrows);CHKERRQ(ierr);
1060 
1061   /* wait on sends */
1062   if (nsends) {
1063     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1064     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1065     ierr = PetscFree(send_status);CHKERRQ(ierr);
1066   }
1067   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1068   ierr = PetscFree(svalues);CHKERRQ(ierr);
1069 
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatMult_MPIAIJ"
1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1076 {
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078   PetscErrorCode ierr;
1079   PetscInt       nt;
1080 
1081   PetscFunctionBegin;
1082   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1083   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1084   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1085   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1086   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1087   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1094 {
1095   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124   PetscBool      merged;
1125 
1126   PetscFunctionBegin;
1127   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1128   /* do nondiagonal part */
1129   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1130   if (!merged) {
1131     /* send it on its way */
1132     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1133     /* do local part */
1134     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1135     /* receive remote parts: note this assumes the values are not actually */
1136     /* added in yy until the next line, */
1137     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138   } else {
1139     /* do local part */
1140     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1141     /* send it on its way */
1142     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143     /* values actually were received in the Begin() but we need to call this nop */
1144     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1145   }
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 EXTERN_C_BEGIN
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1152 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1153 {
1154   MPI_Comm       comm;
1155   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1156   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1157   IS             Me,Notme;
1158   PetscErrorCode ierr;
1159   PetscInt       M,N,first,last,*notme,i;
1160   PetscMPIInt    size;
1161 
1162   PetscFunctionBegin;
1163 
1164   /* Easy test: symmetric diagonal block */
1165   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1166   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1167   if (!*f) PetscFunctionReturn(0);
1168   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1169   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1170   if (size == 1) PetscFunctionReturn(0);
1171 
1172   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1173   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1174   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1175   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1176   for (i=0; i<first; i++) notme[i] = i;
1177   for (i=last; i<M; i++) notme[i-last+first] = i;
1178   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1179   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1180   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1181   Aoff = Aoffs[0];
1182   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1183   Boff = Boffs[0];
1184   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1185   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1186   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1187   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1188   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1189   ierr = PetscFree(notme);CHKERRQ(ierr);
1190    PetscFunctionReturn(0);
1191 }
1192 EXTERN_C_END
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1197 {
1198   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   /* do nondiagonal part */
1203   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1204   /* send it on its way */
1205   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1206   /* do local part */
1207   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1208   /* receive remote parts */
1209   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 /*
1214   This only works correctly for square matrices where the subblock A->A is the
1215    diagonal block
1216 */
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1220 {
1221   PetscErrorCode ierr;
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223 
1224   PetscFunctionBegin;
1225   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1226   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1227   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatScale_MPIAIJ"
1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1234 {
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1240   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatDestroy_MPIAIJ"
1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1247 {
1248   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252 #if defined(PETSC_USE_LOG)
1253   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1254 #endif
1255   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1256   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1257   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1258   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1259 #if defined (PETSC_USE_CTABLE)
1260   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1261 #else
1262   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1263 #endif
1264   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1266   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1267   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1268   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1269   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1270 
1271   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1286 {
1287   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1288   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1289   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1290   PetscErrorCode    ierr;
1291   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1292   int               fd;
1293   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1294   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1295   PetscScalar       *column_values;
1296   PetscInt          message_count,flowcontrolcount;
1297 
1298   PetscFunctionBegin;
1299   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1300   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1301   nz   = A->nz + B->nz;
1302   if (!rank) {
1303     header[0] = MAT_FILE_CLASSID;
1304     header[1] = mat->rmap->N;
1305     header[2] = mat->cmap->N;
1306     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1307     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1308     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1309     /* get largest number of rows any processor has */
1310     rlen = mat->rmap->n;
1311     range = mat->rmap->range;
1312     for (i=1; i<size; i++) {
1313       rlen = PetscMax(rlen,range[i+1] - range[i]);
1314     }
1315   } else {
1316     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1317     rlen = mat->rmap->n;
1318   }
1319 
1320   /* load up the local row counts */
1321   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1322   for (i=0; i<mat->rmap->n; i++) {
1323     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1324   }
1325 
1326   /* store the row lengths to the file */
1327   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1328   if (!rank) {
1329     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1330     for (i=1; i<size; i++) {
1331       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1332       rlen = range[i+1] - range[i];
1333       ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1334       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     }
1336     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1337   } else {
1338     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1339     ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1341   }
1342   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1343 
1344   /* load up the local column indices */
1345   nzmax = nz; /* )th processor needs space a largest processor needs */
1346   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1347   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1348   cnt  = 0;
1349   for (i=0; i<mat->rmap->n; i++) {
1350     for (j=B->i[i]; j<B->i[i+1]; j++) {
1351       if ( (col = garray[B->j[j]]) > cstart) break;
1352       column_indices[cnt++] = col;
1353     }
1354     for (k=A->i[i]; k<A->i[i+1]; k++) {
1355       column_indices[cnt++] = A->j[k] + cstart;
1356     }
1357     for (; j<B->i[i+1]; j++) {
1358       column_indices[cnt++] = garray[B->j[j]];
1359     }
1360   }
1361   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1362 
1363   /* store the column indices to the file */
1364    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1365   if (!rank) {
1366     MPI_Status status;
1367     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1368     for (i=1; i<size; i++) {
1369       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1370       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1371       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1372       ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1373       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1374     }
1375      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1376   } else {
1377     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1378     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379     ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1380     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1381   }
1382   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1383 
1384   /* load up the local column values */
1385   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1386   cnt  = 0;
1387   for (i=0; i<mat->rmap->n; i++) {
1388     for (j=B->i[i]; j<B->i[i+1]; j++) {
1389       if ( garray[B->j[j]] > cstart) break;
1390       column_values[cnt++] = B->a[j];
1391     }
1392     for (k=A->i[i]; k<A->i[i+1]; k++) {
1393       column_values[cnt++] = A->a[k];
1394     }
1395     for (; j<B->i[i+1]; j++) {
1396       column_values[cnt++] = B->a[j];
1397     }
1398   }
1399   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1400 
1401   /* store the column values to the file */
1402    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1403   if (!rank) {
1404     MPI_Status status;
1405     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1406     for (i=1; i<size; i++) {
1407        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1408       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1409       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1410       ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1411       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1412     }
1413     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1414   } else {
1415     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1416     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417     ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1418     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1419   }
1420   ierr = PetscFree(column_values);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1427 {
1428   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1429   PetscErrorCode    ierr;
1430   PetscMPIInt       rank = aij->rank,size = aij->size;
1431   PetscBool         isdraw,iascii,isbinary;
1432   PetscViewer       sviewer;
1433   PetscViewerFormat format;
1434 
1435   PetscFunctionBegin;
1436   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1437   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1438   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1439   if (iascii) {
1440     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1441     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1442       MatInfo    info;
1443       PetscBool  inodes;
1444 
1445       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1446       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1447       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1448       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1449       if (!inodes) {
1450         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1451 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1452       } else {
1453         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1454 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1455       }
1456       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1457       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1458       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1459       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1460       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1461       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1462       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1463       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1464       PetscFunctionReturn(0);
1465     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1466       PetscInt   inodecount,inodelimit,*inodes;
1467       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1468       if (inodes) {
1469         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1472       }
1473       PetscFunctionReturn(0);
1474     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1475       PetscFunctionReturn(0);
1476     }
1477   } else if (isbinary) {
1478     if (size == 1) {
1479       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1480       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1481     } else {
1482       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1483     }
1484     PetscFunctionReturn(0);
1485   } else if (isdraw) {
1486     PetscDraw  draw;
1487     PetscBool  isnull;
1488     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1489     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1490   }
1491 
1492   if (size == 1) {
1493     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1494     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1495   } else {
1496     /* assemble the entire matrix onto first processor. */
1497     Mat         A;
1498     Mat_SeqAIJ  *Aloc;
1499     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1500     MatScalar   *a;
1501 
1502     if (mat->rmap->N > 1024) {
1503       PetscBool  flg = PETSC_FALSE;
1504 
1505       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1506       if (!flg) {
1507         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1508       }
1509     }
1510 
1511     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1512     if (!rank) {
1513       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1514     } else {
1515       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1516     }
1517     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1518     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1519     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1520     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1521 
1522     /* copy over the A part */
1523     Aloc = (Mat_SeqAIJ*)aij->A->data;
1524     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1525     row = mat->rmap->rstart;
1526     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1527     for (i=0; i<m; i++) {
1528       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1529       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1530     }
1531     aj = Aloc->j;
1532     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1533 
1534     /* copy over the B part */
1535     Aloc = (Mat_SeqAIJ*)aij->B->data;
1536     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1537     row  = mat->rmap->rstart;
1538     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1539     ct   = cols;
1540     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1541     for (i=0; i<m; i++) {
1542       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1543       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1544     }
1545     ierr = PetscFree(ct);CHKERRQ(ierr);
1546     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1547     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1548     /*
1549        Everyone has to call to draw the matrix since the graphics waits are
1550        synchronized across all processors that share the PetscDraw object
1551     */
1552     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1553     if (!rank) {
1554       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1555       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1556       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1557       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1558     }
1559     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1560     ierr = MatDestroy(&A);CHKERRQ(ierr);
1561   }
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatView_MPIAIJ"
1567 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1568 {
1569   PetscErrorCode ierr;
1570   PetscBool      iascii,isdraw,issocket,isbinary;
1571 
1572   PetscFunctionBegin;
1573   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1574   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1575   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1576   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1577   if (iascii || isdraw || isbinary || issocket) {
1578     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1579   } else {
1580     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1581   }
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatSOR_MPIAIJ"
1587 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1588 {
1589   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1590   PetscErrorCode ierr;
1591   Vec            bb1 = 0;
1592   PetscBool      hasop;
1593 
1594   PetscFunctionBegin;
1595   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1596     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1597   }
1598 
1599   if (flag == SOR_APPLY_UPPER) {
1600     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1601     PetscFunctionReturn(0);
1602   }
1603 
1604   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1605     if (flag & SOR_ZERO_INITIAL_GUESS) {
1606       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1607       its--;
1608     }
1609 
1610     while (its--) {
1611       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1612       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1613 
1614       /* update rhs: bb1 = bb - B*x */
1615       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1616       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1617 
1618       /* local sweep */
1619       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1620     }
1621   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626     while (its--) {
1627       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1628       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629 
1630       /* update rhs: bb1 = bb - B*x */
1631       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1632       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1633 
1634       /* local sweep */
1635       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1636     }
1637   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1638     if (flag & SOR_ZERO_INITIAL_GUESS) {
1639       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1640       its--;
1641     }
1642     while (its--) {
1643       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1644       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645 
1646       /* update rhs: bb1 = bb - B*x */
1647       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1648       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1649 
1650       /* local sweep */
1651       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1652     }
1653   }  else if (flag & SOR_EISENSTAT) {
1654     Vec         xx1;
1655 
1656     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1657     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1658 
1659     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1660     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     if (!mat->diag) {
1662       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1663       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1664     }
1665     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1666     if (hasop) {
1667       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1668     } else {
1669       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1670     }
1671     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1672 
1673     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1674 
1675     /* local sweep */
1676     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1677     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1678     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1679   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1680 
1681   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatPermute_MPIAIJ"
1687 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1688 {
1689   MPI_Comm       comm,pcomm;
1690   PetscInt       first,local_size,nrows;
1691   const PetscInt *rows;
1692   PetscMPIInt    size;
1693   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1694   PetscErrorCode ierr;
1695 
1696   PetscFunctionBegin;
1697   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1698   /* make a collective version of 'rowp' */
1699   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1700   if (pcomm==comm) {
1701     crowp = rowp;
1702   } else {
1703     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1704     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1705     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1706     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1707   }
1708   /* collect the global row permutation and invert it */
1709   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1710   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1711   if (pcomm!=comm) {
1712     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1713   }
1714   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1715   /* get the local target indices */
1716   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1717   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1718   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1719   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1720   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1721   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1722   /* the column permutation is so much easier;
1723      make a local version of 'colp' and invert it */
1724   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1725   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1726   if (size==1) {
1727     lcolp = colp;
1728   } else {
1729     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1730     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1731     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1732   }
1733   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1734   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1735   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1736   if (size>1) {
1737     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1738     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1739   }
1740   /* now we just get the submatrix */
1741   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1742   /* clean up */
1743   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1744   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 #undef __FUNCT__
1749 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1750 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1751 {
1752   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1753   Mat            A = mat->A,B = mat->B;
1754   PetscErrorCode ierr;
1755   PetscReal      isend[5],irecv[5];
1756 
1757   PetscFunctionBegin;
1758   info->block_size     = 1.0;
1759   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1760   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1761   isend[3] = info->memory;  isend[4] = info->mallocs;
1762   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1763   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1764   isend[3] += info->memory;  isend[4] += info->mallocs;
1765   if (flag == MAT_LOCAL) {
1766     info->nz_used      = isend[0];
1767     info->nz_allocated = isend[1];
1768     info->nz_unneeded  = isend[2];
1769     info->memory       = isend[3];
1770     info->mallocs      = isend[4];
1771   } else if (flag == MAT_GLOBAL_MAX) {
1772     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1773     info->nz_used      = irecv[0];
1774     info->nz_allocated = irecv[1];
1775     info->nz_unneeded  = irecv[2];
1776     info->memory       = irecv[3];
1777     info->mallocs      = irecv[4];
1778   } else if (flag == MAT_GLOBAL_SUM) {
1779     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1780     info->nz_used      = irecv[0];
1781     info->nz_allocated = irecv[1];
1782     info->nz_unneeded  = irecv[2];
1783     info->memory       = irecv[3];
1784     info->mallocs      = irecv[4];
1785   }
1786   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1787   info->fill_ratio_needed = 0;
1788   info->factor_mallocs    = 0;
1789 
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 #undef __FUNCT__
1794 #define __FUNCT__ "MatSetOption_MPIAIJ"
1795 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1796 {
1797   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1798   PetscErrorCode ierr;
1799 
1800   PetscFunctionBegin;
1801   switch (op) {
1802   case MAT_NEW_NONZERO_LOCATIONS:
1803   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1804   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1805   case MAT_KEEP_NONZERO_PATTERN:
1806   case MAT_NEW_NONZERO_LOCATION_ERR:
1807   case MAT_USE_INODES:
1808   case MAT_IGNORE_ZERO_ENTRIES:
1809     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1810     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1811     break;
1812   case MAT_ROW_ORIENTED:
1813     a->roworiented = flg;
1814     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1815     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1816     break;
1817   case MAT_NEW_DIAGONALS:
1818     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1819     break;
1820   case MAT_IGNORE_OFF_PROC_ENTRIES:
1821     a->donotstash = flg;
1822     break;
1823   case MAT_SPD:
1824     A->spd_set                         = PETSC_TRUE;
1825     A->spd                             = flg;
1826     if (flg) {
1827       A->symmetric                     = PETSC_TRUE;
1828       A->structurally_symmetric        = PETSC_TRUE;
1829       A->symmetric_set                 = PETSC_TRUE;
1830       A->structurally_symmetric_set    = PETSC_TRUE;
1831     }
1832     break;
1833   case MAT_SYMMETRIC:
1834     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1835     break;
1836   case MAT_STRUCTURALLY_SYMMETRIC:
1837     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1838     break;
1839   case MAT_HERMITIAN:
1840     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1841     break;
1842   case MAT_SYMMETRY_ETERNAL:
1843     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1844     break;
1845   default:
1846     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1847   }
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 #undef __FUNCT__
1852 #define __FUNCT__ "MatGetRow_MPIAIJ"
1853 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1854 {
1855   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1856   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1857   PetscErrorCode ierr;
1858   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1859   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1860   PetscInt       *cmap,*idx_p;
1861 
1862   PetscFunctionBegin;
1863   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1864   mat->getrowactive = PETSC_TRUE;
1865 
1866   if (!mat->rowvalues && (idx || v)) {
1867     /*
1868         allocate enough space to hold information from the longest row.
1869     */
1870     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1871     PetscInt   max = 1,tmp;
1872     for (i=0; i<matin->rmap->n; i++) {
1873       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1874       if (max < tmp) { max = tmp; }
1875     }
1876     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1877   }
1878 
1879   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1880   lrow = row - rstart;
1881 
1882   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1883   if (!v)   {pvA = 0; pvB = 0;}
1884   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1885   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1886   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1887   nztot = nzA + nzB;
1888 
1889   cmap  = mat->garray;
1890   if (v  || idx) {
1891     if (nztot) {
1892       /* Sort by increasing column numbers, assuming A and B already sorted */
1893       PetscInt imark = -1;
1894       if (v) {
1895         *v = v_p = mat->rowvalues;
1896         for (i=0; i<nzB; i++) {
1897           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1898           else break;
1899         }
1900         imark = i;
1901         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1902         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1903       }
1904       if (idx) {
1905         *idx = idx_p = mat->rowindices;
1906         if (imark > -1) {
1907           for (i=0; i<imark; i++) {
1908             idx_p[i] = cmap[cworkB[i]];
1909           }
1910         } else {
1911           for (i=0; i<nzB; i++) {
1912             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1913             else break;
1914           }
1915           imark = i;
1916         }
1917         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1918         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1919       }
1920     } else {
1921       if (idx) *idx = 0;
1922       if (v)   *v   = 0;
1923     }
1924   }
1925   *nz = nztot;
1926   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1927   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1933 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1934 {
1935   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1936 
1937   PetscFunctionBegin;
1938   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1939   aij->getrowactive = PETSC_FALSE;
1940   PetscFunctionReturn(0);
1941 }
1942 
1943 #undef __FUNCT__
1944 #define __FUNCT__ "MatNorm_MPIAIJ"
1945 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1946 {
1947   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1948   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1949   PetscErrorCode ierr;
1950   PetscInt       i,j,cstart = mat->cmap->rstart;
1951   PetscReal      sum = 0.0;
1952   MatScalar      *v;
1953 
1954   PetscFunctionBegin;
1955   if (aij->size == 1) {
1956     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1957   } else {
1958     if (type == NORM_FROBENIUS) {
1959       v = amat->a;
1960       for (i=0; i<amat->nz; i++) {
1961 #if defined(PETSC_USE_COMPLEX)
1962         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1963 #else
1964         sum += (*v)*(*v); v++;
1965 #endif
1966       }
1967       v = bmat->a;
1968       for (i=0; i<bmat->nz; i++) {
1969 #if defined(PETSC_USE_COMPLEX)
1970         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1971 #else
1972         sum += (*v)*(*v); v++;
1973 #endif
1974       }
1975       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1976       *norm = PetscSqrtReal(*norm);
1977     } else if (type == NORM_1) { /* max column norm */
1978       PetscReal *tmp,*tmp2;
1979       PetscInt  *jj,*garray = aij->garray;
1980       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1981       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1982       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1983       *norm = 0.0;
1984       v = amat->a; jj = amat->j;
1985       for (j=0; j<amat->nz; j++) {
1986         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1987       }
1988       v = bmat->a; jj = bmat->j;
1989       for (j=0; j<bmat->nz; j++) {
1990         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1991       }
1992       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1993       for (j=0; j<mat->cmap->N; j++) {
1994         if (tmp2[j] > *norm) *norm = tmp2[j];
1995       }
1996       ierr = PetscFree(tmp);CHKERRQ(ierr);
1997       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1998     } else if (type == NORM_INFINITY) { /* max row norm */
1999       PetscReal ntemp = 0.0;
2000       for (j=0; j<aij->A->rmap->n; j++) {
2001         v = amat->a + amat->i[j];
2002         sum = 0.0;
2003         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2004           sum += PetscAbsScalar(*v); v++;
2005         }
2006         v = bmat->a + bmat->i[j];
2007         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2008           sum += PetscAbsScalar(*v); v++;
2009         }
2010         if (sum > ntemp) ntemp = sum;
2011       }
2012       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2013     } else {
2014       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2015     }
2016   }
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatTranspose_MPIAIJ"
2022 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2023 {
2024   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2025   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2026   PetscErrorCode ierr;
2027   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2028   PetscInt       cstart=A->cmap->rstart,ncol;
2029   Mat            B;
2030   MatScalar      *array;
2031 
2032   PetscFunctionBegin;
2033   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2034 
2035   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2036   ai = Aloc->i; aj = Aloc->j;
2037   bi = Bloc->i; bj = Bloc->j;
2038   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2039     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2040     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2041     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2042     for (i=0; i<ai[ma]; i++){
2043       d_nnz[aj[i]] ++;
2044       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2045     }
2046 
2047     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2048     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2049     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2050     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2051     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2052   } else {
2053     B = *matout;
2054   }
2055 
2056   /* copy over the A part */
2057   array = Aloc->a;
2058   row = A->rmap->rstart;
2059   for (i=0; i<ma; i++) {
2060     ncol = ai[i+1]-ai[i];
2061     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2062     row++; array += ncol; aj += ncol;
2063   }
2064   aj = Aloc->j;
2065   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2066 
2067   /* copy over the B part */
2068   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2069   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2070   array = Bloc->a;
2071   row = A->rmap->rstart;
2072   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2073   cols_tmp = cols;
2074   for (i=0; i<mb; i++) {
2075     ncol = bi[i+1]-bi[i];
2076     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2077     row++; array += ncol; cols_tmp += ncol;
2078   }
2079   ierr = PetscFree(cols);CHKERRQ(ierr);
2080 
2081   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2082   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2083   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2084     *matout = B;
2085   } else {
2086     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2087   }
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 #undef __FUNCT__
2092 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2093 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2094 {
2095   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2096   Mat            a = aij->A,b = aij->B;
2097   PetscErrorCode ierr;
2098   PetscInt       s1,s2,s3;
2099 
2100   PetscFunctionBegin;
2101   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2102   if (rr) {
2103     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2104     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2105     /* Overlap communication with computation. */
2106     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2107   }
2108   if (ll) {
2109     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2110     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2111     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2112   }
2113   /* scale  the diagonal block */
2114   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2115 
2116   if (rr) {
2117     /* Do a scatter end and then right scale the off-diagonal block */
2118     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2119     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2120   }
2121 
2122   PetscFunctionReturn(0);
2123 }
2124 
2125 #undef __FUNCT__
2126 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2127 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2128 {
2129   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2130   PetscErrorCode ierr;
2131 
2132   PetscFunctionBegin;
2133   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2134   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2135   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2136   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2137   PetscFunctionReturn(0);
2138 }
2139 #undef __FUNCT__
2140 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2141 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2142 {
2143   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2144   PetscErrorCode ierr;
2145 
2146   PetscFunctionBegin;
2147   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatEqual_MPIAIJ"
2153 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2154 {
2155   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2156   Mat            a,b,c,d;
2157   PetscBool      flg;
2158   PetscErrorCode ierr;
2159 
2160   PetscFunctionBegin;
2161   a = matA->A; b = matA->B;
2162   c = matB->A; d = matB->B;
2163 
2164   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2165   if (flg) {
2166     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2167   }
2168   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 #undef __FUNCT__
2173 #define __FUNCT__ "MatCopy_MPIAIJ"
2174 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2175 {
2176   PetscErrorCode ierr;
2177   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2178   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2179 
2180   PetscFunctionBegin;
2181   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2182   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2183     /* because of the column compression in the off-processor part of the matrix a->B,
2184        the number of columns in a->B and b->B may be different, hence we cannot call
2185        the MatCopy() directly on the two parts. If need be, we can provide a more
2186        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2187        then copying the submatrices */
2188     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2189   } else {
2190     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2191     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2192   }
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2198 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2199 {
2200   PetscErrorCode ierr;
2201 
2202   PetscFunctionBegin;
2203   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 #undef __FUNCT__
2208 #define __FUNCT__ "MatAXPY_MPIAIJ"
2209 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2210 {
2211   PetscErrorCode ierr;
2212   PetscInt       i;
2213   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2214   PetscBLASInt   bnz,one=1;
2215   Mat_SeqAIJ     *x,*y;
2216 
2217   PetscFunctionBegin;
2218   if (str == SAME_NONZERO_PATTERN) {
2219     PetscScalar alpha = a;
2220     x = (Mat_SeqAIJ *)xx->A->data;
2221     y = (Mat_SeqAIJ *)yy->A->data;
2222     bnz = PetscBLASIntCast(x->nz);
2223     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2224     x = (Mat_SeqAIJ *)xx->B->data;
2225     y = (Mat_SeqAIJ *)yy->B->data;
2226     bnz = PetscBLASIntCast(x->nz);
2227     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2228   } else if (str == SUBSET_NONZERO_PATTERN) {
2229     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2230 
2231     x = (Mat_SeqAIJ *)xx->B->data;
2232     y = (Mat_SeqAIJ *)yy->B->data;
2233     if (y->xtoy && y->XtoY != xx->B) {
2234       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2235       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2236     }
2237     if (!y->xtoy) { /* get xtoy */
2238       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2239       y->XtoY = xx->B;
2240       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2241     }
2242     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2243   } else {
2244     Mat B;
2245     PetscInt *nnz_d,*nnz_o;
2246     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2247     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2248     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2249     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2250     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2251     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2252     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2253     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2254     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2255     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2256     ierr = MatHeaderReplace(Y,B);
2257     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2258     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2259   }
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2264 
2265 #undef __FUNCT__
2266 #define __FUNCT__ "MatConjugate_MPIAIJ"
2267 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2268 {
2269 #if defined(PETSC_USE_COMPLEX)
2270   PetscErrorCode ierr;
2271   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2275   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2276 #else
2277   PetscFunctionBegin;
2278 #endif
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 #undef __FUNCT__
2283 #define __FUNCT__ "MatRealPart_MPIAIJ"
2284 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2285 {
2286   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2287   PetscErrorCode ierr;
2288 
2289   PetscFunctionBegin;
2290   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2291   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 #undef __FUNCT__
2296 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2297 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2298 {
2299   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2300   PetscErrorCode ierr;
2301 
2302   PetscFunctionBegin;
2303   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2304   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 #ifdef PETSC_HAVE_PBGL
2309 
2310 #include <boost/parallel/mpi/bsp_process_group.hpp>
2311 #include <boost/graph/distributed/ilu_default_graph.hpp>
2312 #include <boost/graph/distributed/ilu_0_block.hpp>
2313 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2314 #include <boost/graph/distributed/petsc/interface.hpp>
2315 #include <boost/multi_array.hpp>
2316 #include <boost/parallel/distributed_property_map->hpp>
2317 
2318 #undef __FUNCT__
2319 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2320 /*
2321   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2322 */
2323 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2324 {
2325   namespace petsc = boost::distributed::petsc;
2326 
2327   namespace graph_dist = boost::graph::distributed;
2328   using boost::graph::distributed::ilu_default::process_group_type;
2329   using boost::graph::ilu_permuted;
2330 
2331   PetscBool       row_identity, col_identity;
2332   PetscContainer  c;
2333   PetscInt        m, n, M, N;
2334   PetscErrorCode  ierr;
2335 
2336   PetscFunctionBegin;
2337   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2338   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2339   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2340   if (!row_identity || !col_identity) {
2341     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2342   }
2343 
2344   process_group_type pg;
2345   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2346   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2347   lgraph_type&   level_graph = *lgraph_p;
2348   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2349 
2350   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2351   ilu_permuted(level_graph);
2352 
2353   /* put together the new matrix */
2354   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2355   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2356   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2357   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2358   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2359   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2360   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2361 
2362   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2363   ierr = PetscContainerSetPointer(c, lgraph_p);
2364   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2365   ierr = PetscContainerDestroy(&c);
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2371 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2372 {
2373   PetscFunctionBegin;
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 #undef __FUNCT__
2378 #define __FUNCT__ "MatSolve_MPIAIJ"
2379 /*
2380   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2381 */
2382 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2383 {
2384   namespace graph_dist = boost::graph::distributed;
2385 
2386   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2387   lgraph_type*   lgraph_p;
2388   PetscContainer c;
2389   PetscErrorCode ierr;
2390 
2391   PetscFunctionBegin;
2392   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2393   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2394   ierr = VecCopy(b, x);CHKERRQ(ierr);
2395 
2396   PetscScalar* array_x;
2397   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2398   PetscInt sx;
2399   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2400 
2401   PetscScalar* array_b;
2402   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2403   PetscInt sb;
2404   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2405 
2406   lgraph_type&   level_graph = *lgraph_p;
2407   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2408 
2409   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2410   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2411                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2412 
2413   typedef boost::iterator_property_map<array_ref_type::iterator,
2414                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2415   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2416                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2417 
2418   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2419 
2420   PetscFunctionReturn(0);
2421 }
2422 #endif
2423 
2424 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2425   PetscInt       nzlocal,nsends,nrecvs;
2426   PetscMPIInt    *send_rank,*recv_rank;
2427   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2428   PetscScalar    *sbuf_a,**rbuf_a;
2429   PetscErrorCode (*Destroy)(Mat);
2430 } Mat_Redundant;
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2434 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2435 {
2436   PetscErrorCode       ierr;
2437   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2438   PetscInt             i;
2439 
2440   PetscFunctionBegin;
2441   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2442   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2443   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2444   for (i=0; i<redund->nrecvs; i++){
2445     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2446     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2447   }
2448   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2449   ierr = PetscFree(redund);CHKERRQ(ierr);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatDestroy_MatRedundant"
2455 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2456 {
2457   PetscErrorCode  ierr;
2458   PetscContainer  container;
2459   Mat_Redundant   *redund=PETSC_NULL;
2460 
2461   PetscFunctionBegin;
2462   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2463   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2464   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2465   A->ops->destroy = redund->Destroy;
2466   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2467   if (A->ops->destroy) {
2468     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2469   }
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 #undef __FUNCT__
2474 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2475 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2476 {
2477   PetscMPIInt    rank,size;
2478   MPI_Comm       comm=((PetscObject)mat)->comm;
2479   PetscErrorCode ierr;
2480   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2481   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2482   PetscInt       *rowrange=mat->rmap->range;
2483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2484   Mat            A=aij->A,B=aij->B,C=*matredundant;
2485   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2486   PetscScalar    *sbuf_a;
2487   PetscInt       nzlocal=a->nz+b->nz;
2488   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2489   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2490   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2491   MatScalar      *aworkA,*aworkB;
2492   PetscScalar    *vals;
2493   PetscMPIInt    tag1,tag2,tag3,imdex;
2494   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2495                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2496   MPI_Status     recv_status,*send_status;
2497   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2498   PetscInt       **rbuf_j=PETSC_NULL;
2499   PetscScalar    **rbuf_a=PETSC_NULL;
2500   Mat_Redundant  *redund=PETSC_NULL;
2501   PetscContainer container;
2502 
2503   PetscFunctionBegin;
2504   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2505   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2506 
2507   if (reuse == MAT_REUSE_MATRIX) {
2508     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2509     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2510     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2511     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2512     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2513     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2514     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2515     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2516 
2517     nsends    = redund->nsends;
2518     nrecvs    = redund->nrecvs;
2519     send_rank = redund->send_rank;
2520     recv_rank = redund->recv_rank;
2521     sbuf_nz   = redund->sbuf_nz;
2522     rbuf_nz   = redund->rbuf_nz;
2523     sbuf_j    = redund->sbuf_j;
2524     sbuf_a    = redund->sbuf_a;
2525     rbuf_j    = redund->rbuf_j;
2526     rbuf_a    = redund->rbuf_a;
2527   }
2528 
2529   if (reuse == MAT_INITIAL_MATRIX){
2530     PetscMPIInt  subrank,subsize;
2531     PetscInt     nleftover,np_subcomm;
2532     /* get the destination processors' id send_rank, nsends and nrecvs */
2533     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2534     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2535     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2536     np_subcomm = size/nsubcomm;
2537     nleftover  = size - nsubcomm*np_subcomm;
2538     nsends = 0; nrecvs = 0;
2539     for (i=0; i<size; i++){ /* i=rank*/
2540       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2541         send_rank[nsends] = i; nsends++;
2542         recv_rank[nrecvs++] = i;
2543       }
2544     }
2545     if (rank >= size - nleftover){/* this proc is a leftover processor */
2546       i = size-nleftover-1;
2547       j = 0;
2548       while (j < nsubcomm - nleftover){
2549         send_rank[nsends++] = i;
2550         i--; j++;
2551       }
2552     }
2553 
2554     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2555       for (i=0; i<nleftover; i++){
2556         recv_rank[nrecvs++] = size-nleftover+i;
2557       }
2558     }
2559 
2560     /* allocate sbuf_j, sbuf_a */
2561     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2562     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2563     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2564   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2565 
2566   /* copy mat's local entries into the buffers */
2567   if (reuse == MAT_INITIAL_MATRIX){
2568     rownz_max = 0;
2569     rptr = sbuf_j;
2570     cols = sbuf_j + rend-rstart + 1;
2571     vals = sbuf_a;
2572     rptr[0] = 0;
2573     for (i=0; i<rend-rstart; i++){
2574       row = i + rstart;
2575       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2576       ncols  = nzA + nzB;
2577       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2578       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2579       /* load the column indices for this row into cols */
2580       lwrite = 0;
2581       for (l=0; l<nzB; l++) {
2582         if ((ctmp = bmap[cworkB[l]]) < cstart){
2583           vals[lwrite]   = aworkB[l];
2584           cols[lwrite++] = ctmp;
2585         }
2586       }
2587       for (l=0; l<nzA; l++){
2588         vals[lwrite]   = aworkA[l];
2589         cols[lwrite++] = cstart + cworkA[l];
2590       }
2591       for (l=0; l<nzB; l++) {
2592         if ((ctmp = bmap[cworkB[l]]) >= cend){
2593           vals[lwrite]   = aworkB[l];
2594           cols[lwrite++] = ctmp;
2595         }
2596       }
2597       vals += ncols;
2598       cols += ncols;
2599       rptr[i+1] = rptr[i] + ncols;
2600       if (rownz_max < ncols) rownz_max = ncols;
2601     }
2602     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2603   } else { /* only copy matrix values into sbuf_a */
2604     rptr = sbuf_j;
2605     vals = sbuf_a;
2606     rptr[0] = 0;
2607     for (i=0; i<rend-rstart; i++){
2608       row = i + rstart;
2609       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2610       ncols  = nzA + nzB;
2611       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2612       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2613       lwrite = 0;
2614       for (l=0; l<nzB; l++) {
2615         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2616       }
2617       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2618       for (l=0; l<nzB; l++) {
2619         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2620       }
2621       vals += ncols;
2622       rptr[i+1] = rptr[i] + ncols;
2623     }
2624   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2625 
2626   /* send nzlocal to others, and recv other's nzlocal */
2627   /*--------------------------------------------------*/
2628   if (reuse == MAT_INITIAL_MATRIX){
2629     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2630     s_waits2 = s_waits3 + nsends;
2631     s_waits1 = s_waits2 + nsends;
2632     r_waits1 = s_waits1 + nsends;
2633     r_waits2 = r_waits1 + nrecvs;
2634     r_waits3 = r_waits2 + nrecvs;
2635   } else {
2636     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2637     r_waits3 = s_waits3 + nsends;
2638   }
2639 
2640   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2641   if (reuse == MAT_INITIAL_MATRIX){
2642     /* get new tags to keep the communication clean */
2643     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2644     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2645     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2646 
2647     /* post receives of other's nzlocal */
2648     for (i=0; i<nrecvs; i++){
2649       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2650     }
2651     /* send nzlocal to others */
2652     for (i=0; i<nsends; i++){
2653       sbuf_nz[i] = nzlocal;
2654       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2655     }
2656     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2657     count = nrecvs;
2658     while (count) {
2659       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2660       recv_rank[imdex] = recv_status.MPI_SOURCE;
2661       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2662       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2663 
2664       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2665       rbuf_nz[imdex] += i + 2;
2666       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2667       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2668       count--;
2669     }
2670     /* wait on sends of nzlocal */
2671     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2672     /* send mat->i,j to others, and recv from other's */
2673     /*------------------------------------------------*/
2674     for (i=0; i<nsends; i++){
2675       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2676       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2677     }
2678     /* wait on receives of mat->i,j */
2679     /*------------------------------*/
2680     count = nrecvs;
2681     while (count) {
2682       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2683       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2684       count--;
2685     }
2686     /* wait on sends of mat->i,j */
2687     /*---------------------------*/
2688     if (nsends) {
2689       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2690     }
2691   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2692 
2693   /* post receives, send and receive mat->a */
2694   /*----------------------------------------*/
2695   for (imdex=0; imdex<nrecvs; imdex++) {
2696     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2697   }
2698   for (i=0; i<nsends; i++){
2699     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2700   }
2701   count = nrecvs;
2702   while (count) {
2703     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2704     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2705     count--;
2706   }
2707   if (nsends) {
2708     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2709   }
2710 
2711   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2712 
2713   /* create redundant matrix */
2714   /*-------------------------*/
2715   if (reuse == MAT_INITIAL_MATRIX){
2716     /* compute rownz_max for preallocation */
2717     for (imdex=0; imdex<nrecvs; imdex++){
2718       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2719       rptr = rbuf_j[imdex];
2720       for (i=0; i<j; i++){
2721         ncols = rptr[i+1] - rptr[i];
2722         if (rownz_max < ncols) rownz_max = ncols;
2723       }
2724     }
2725 
2726     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2727     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2728     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2729     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2730     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2731   } else {
2732     C = *matredundant;
2733   }
2734 
2735   /* insert local matrix entries */
2736   rptr = sbuf_j;
2737   cols = sbuf_j + rend-rstart + 1;
2738   vals = sbuf_a;
2739   for (i=0; i<rend-rstart; i++){
2740     row   = i + rstart;
2741     ncols = rptr[i+1] - rptr[i];
2742     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2743     vals += ncols;
2744     cols += ncols;
2745   }
2746   /* insert received matrix entries */
2747   for (imdex=0; imdex<nrecvs; imdex++){
2748     rstart = rowrange[recv_rank[imdex]];
2749     rend   = rowrange[recv_rank[imdex]+1];
2750     rptr = rbuf_j[imdex];
2751     cols = rbuf_j[imdex] + rend-rstart + 1;
2752     vals = rbuf_a[imdex];
2753     for (i=0; i<rend-rstart; i++){
2754       row   = i + rstart;
2755       ncols = rptr[i+1] - rptr[i];
2756       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2757       vals += ncols;
2758       cols += ncols;
2759     }
2760   }
2761   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2762   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2763   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2764   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2765   if (reuse == MAT_INITIAL_MATRIX) {
2766     PetscContainer container;
2767     *matredundant = C;
2768     /* create a supporting struct and attach it to C for reuse */
2769     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2770     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2771     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2772     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2773     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2774     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2775 
2776     redund->nzlocal = nzlocal;
2777     redund->nsends  = nsends;
2778     redund->nrecvs  = nrecvs;
2779     redund->send_rank = send_rank;
2780     redund->recv_rank = recv_rank;
2781     redund->sbuf_nz = sbuf_nz;
2782     redund->rbuf_nz = rbuf_nz;
2783     redund->sbuf_j  = sbuf_j;
2784     redund->sbuf_a  = sbuf_a;
2785     redund->rbuf_j  = rbuf_j;
2786     redund->rbuf_a  = rbuf_a;
2787 
2788     redund->Destroy = C->ops->destroy;
2789     C->ops->destroy = MatDestroy_MatRedundant;
2790   }
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 #undef __FUNCT__
2795 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2796 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2797 {
2798   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2799   PetscErrorCode ierr;
2800   PetscInt       i,*idxb = 0;
2801   PetscScalar    *va,*vb;
2802   Vec            vtmp;
2803 
2804   PetscFunctionBegin;
2805   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2806   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2807   if (idx) {
2808     for (i=0; i<A->rmap->n; i++) {
2809       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2810     }
2811   }
2812 
2813   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2814   if (idx) {
2815     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2816   }
2817   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2818   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2819 
2820   for (i=0; i<A->rmap->n; i++){
2821     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2822       va[i] = vb[i];
2823       if (idx) idx[i] = a->garray[idxb[i]];
2824     }
2825   }
2826 
2827   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2828   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2829   ierr = PetscFree(idxb);CHKERRQ(ierr);
2830   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2831   PetscFunctionReturn(0);
2832 }
2833 
2834 #undef __FUNCT__
2835 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2836 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2837 {
2838   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2839   PetscErrorCode ierr;
2840   PetscInt       i,*idxb = 0;
2841   PetscScalar    *va,*vb;
2842   Vec            vtmp;
2843 
2844   PetscFunctionBegin;
2845   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2846   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2847   if (idx) {
2848     for (i=0; i<A->cmap->n; i++) {
2849       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2850     }
2851   }
2852 
2853   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2854   if (idx) {
2855     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2856   }
2857   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2858   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2859 
2860   for (i=0; i<A->rmap->n; i++){
2861     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2862       va[i] = vb[i];
2863       if (idx) idx[i] = a->garray[idxb[i]];
2864     }
2865   }
2866 
2867   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2868   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2869   ierr = PetscFree(idxb);CHKERRQ(ierr);
2870   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 #undef __FUNCT__
2875 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2876 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2877 {
2878   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2879   PetscInt       n      = A->rmap->n;
2880   PetscInt       cstart = A->cmap->rstart;
2881   PetscInt      *cmap   = mat->garray;
2882   PetscInt      *diagIdx, *offdiagIdx;
2883   Vec            diagV, offdiagV;
2884   PetscScalar   *a, *diagA, *offdiagA;
2885   PetscInt       r;
2886   PetscErrorCode ierr;
2887 
2888   PetscFunctionBegin;
2889   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2890   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2891   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2892   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2893   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2894   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2895   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2896   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2897   for(r = 0; r < n; ++r) {
2898     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2899       a[r]   = diagA[r];
2900       idx[r] = cstart + diagIdx[r];
2901     } else {
2902       a[r]   = offdiagA[r];
2903       idx[r] = cmap[offdiagIdx[r]];
2904     }
2905   }
2906   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2907   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2908   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2909   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2910   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2911   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2912   PetscFunctionReturn(0);
2913 }
2914 
2915 #undef __FUNCT__
2916 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2917 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2918 {
2919   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2920   PetscInt       n      = A->rmap->n;
2921   PetscInt       cstart = A->cmap->rstart;
2922   PetscInt      *cmap   = mat->garray;
2923   PetscInt      *diagIdx, *offdiagIdx;
2924   Vec            diagV, offdiagV;
2925   PetscScalar   *a, *diagA, *offdiagA;
2926   PetscInt       r;
2927   PetscErrorCode ierr;
2928 
2929   PetscFunctionBegin;
2930   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2931   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2932   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2933   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2934   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2935   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2936   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2937   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2938   for(r = 0; r < n; ++r) {
2939     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2940       a[r]   = diagA[r];
2941       idx[r] = cstart + diagIdx[r];
2942     } else {
2943       a[r]   = offdiagA[r];
2944       idx[r] = cmap[offdiagIdx[r]];
2945     }
2946   }
2947   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2948   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2949   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2950   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2951   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2952   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2953   PetscFunctionReturn(0);
2954 }
2955 
2956 #undef __FUNCT__
2957 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2958 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2959 {
2960   PetscErrorCode ierr;
2961   Mat            *dummy;
2962 
2963   PetscFunctionBegin;
2964   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2965   *newmat = *dummy;
2966   ierr = PetscFree(dummy);CHKERRQ(ierr);
2967   PetscFunctionReturn(0);
2968 }
2969 
2970 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2971 
2972 #undef __FUNCT__
2973 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2974 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2975 {
2976   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2977   PetscErrorCode ierr;
2978 
2979   PetscFunctionBegin;
2980   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2981   PetscFunctionReturn(0);
2982 }
2983 
2984 
2985 /* -------------------------------------------------------------------*/
2986 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2987        MatGetRow_MPIAIJ,
2988        MatRestoreRow_MPIAIJ,
2989        MatMult_MPIAIJ,
2990 /* 4*/ MatMultAdd_MPIAIJ,
2991        MatMultTranspose_MPIAIJ,
2992        MatMultTransposeAdd_MPIAIJ,
2993 #ifdef PETSC_HAVE_PBGL
2994        MatSolve_MPIAIJ,
2995 #else
2996        0,
2997 #endif
2998        0,
2999        0,
3000 /*10*/ 0,
3001        0,
3002        0,
3003        MatSOR_MPIAIJ,
3004        MatTranspose_MPIAIJ,
3005 /*15*/ MatGetInfo_MPIAIJ,
3006        MatEqual_MPIAIJ,
3007        MatGetDiagonal_MPIAIJ,
3008        MatDiagonalScale_MPIAIJ,
3009        MatNorm_MPIAIJ,
3010 /*20*/ MatAssemblyBegin_MPIAIJ,
3011        MatAssemblyEnd_MPIAIJ,
3012        MatSetOption_MPIAIJ,
3013        MatZeroEntries_MPIAIJ,
3014 /*24*/ MatZeroRows_MPIAIJ,
3015        0,
3016 #ifdef PETSC_HAVE_PBGL
3017        0,
3018 #else
3019        0,
3020 #endif
3021        0,
3022        0,
3023 /*29*/ MatSetUpPreallocation_MPIAIJ,
3024 #ifdef PETSC_HAVE_PBGL
3025        0,
3026 #else
3027        0,
3028 #endif
3029        0,
3030        0,
3031        0,
3032 /*34*/ MatDuplicate_MPIAIJ,
3033        0,
3034        0,
3035        0,
3036        0,
3037 /*39*/ MatAXPY_MPIAIJ,
3038        MatGetSubMatrices_MPIAIJ,
3039        MatIncreaseOverlap_MPIAIJ,
3040        MatGetValues_MPIAIJ,
3041        MatCopy_MPIAIJ,
3042 /*44*/ MatGetRowMax_MPIAIJ,
3043        MatScale_MPIAIJ,
3044        0,
3045        0,
3046        MatZeroRowsColumns_MPIAIJ,
3047 /*49*/ MatSetBlockSize_MPIAIJ,
3048        0,
3049        0,
3050        0,
3051        0,
3052 /*54*/ MatFDColoringCreate_MPIAIJ,
3053        0,
3054        MatSetUnfactored_MPIAIJ,
3055        MatPermute_MPIAIJ,
3056        0,
3057 /*59*/ MatGetSubMatrix_MPIAIJ,
3058        MatDestroy_MPIAIJ,
3059        MatView_MPIAIJ,
3060        0,
3061        0,
3062 /*64*/ 0,
3063        0,
3064        0,
3065        0,
3066        0,
3067 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3068        MatGetRowMinAbs_MPIAIJ,
3069        0,
3070        MatSetColoring_MPIAIJ,
3071 #if defined(PETSC_HAVE_ADIC)
3072        MatSetValuesAdic_MPIAIJ,
3073 #else
3074        0,
3075 #endif
3076        MatSetValuesAdifor_MPIAIJ,
3077 /*75*/ MatFDColoringApply_AIJ,
3078        0,
3079        0,
3080        0,
3081        0,
3082 /*80*/ 0,
3083        0,
3084        0,
3085 /*83*/ MatLoad_MPIAIJ,
3086        0,
3087        0,
3088        0,
3089        0,
3090        0,
3091 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3092        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3093        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3094        MatPtAP_Basic,
3095        MatPtAPSymbolic_MPIAIJ,
3096 /*94*/ MatPtAPNumeric_MPIAIJ,
3097        0,
3098        0,
3099        0,
3100        0,
3101 /*99*/ 0,
3102        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3103        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3104        MatConjugate_MPIAIJ,
3105        0,
3106 /*104*/MatSetValuesRow_MPIAIJ,
3107        MatRealPart_MPIAIJ,
3108        MatImaginaryPart_MPIAIJ,
3109        0,
3110        0,
3111 /*109*/0,
3112        MatGetRedundantMatrix_MPIAIJ,
3113        MatGetRowMin_MPIAIJ,
3114        0,
3115        0,
3116 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3117        0,
3118        0,
3119        0,
3120        0,
3121 /*119*/0,
3122        0,
3123        0,
3124        0,
3125        MatGetMultiProcBlock_MPIAIJ,
3126 /*124*/MatFindNonZeroRows_MPIAIJ,
3127        MatGetColumnNorms_MPIAIJ,
3128        MatInvertBlockDiagonal_MPIAIJ,
3129        0,
3130        MatGetSubMatricesParallel_MPIAIJ,
3131 /*129*/0
3132 };
3133 
3134 /* ----------------------------------------------------------------------------------------*/
3135 
3136 EXTERN_C_BEGIN
3137 #undef __FUNCT__
3138 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3139 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3140 {
3141   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3142   PetscErrorCode ierr;
3143 
3144   PetscFunctionBegin;
3145   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3146   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3147   PetscFunctionReturn(0);
3148 }
3149 EXTERN_C_END
3150 
3151 EXTERN_C_BEGIN
3152 #undef __FUNCT__
3153 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3154 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3155 {
3156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3157   PetscErrorCode ierr;
3158 
3159   PetscFunctionBegin;
3160   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3161   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3162   PetscFunctionReturn(0);
3163 }
3164 EXTERN_C_END
3165 
3166 EXTERN_C_BEGIN
3167 #undef __FUNCT__
3168 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3169 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3170 {
3171   Mat_MPIAIJ     *b;
3172   PetscErrorCode ierr;
3173   PetscInt       i;
3174 
3175   PetscFunctionBegin;
3176   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3177   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3178   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3179   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3180 
3181   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3182   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3183   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3184   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3185   if (d_nnz) {
3186     for (i=0; i<B->rmap->n; i++) {
3187       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3188     }
3189   }
3190   if (o_nnz) {
3191     for (i=0; i<B->rmap->n; i++) {
3192       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3193     }
3194   }
3195   b = (Mat_MPIAIJ*)B->data;
3196 
3197   if (!B->preallocated) {
3198     /* Explicitly create 2 MATSEQAIJ matrices. */
3199     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3200     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3201     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3202     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3203     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3204     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3205     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3206     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3207   }
3208 
3209   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3210   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3211   B->preallocated = PETSC_TRUE;
3212   PetscFunctionReturn(0);
3213 }
3214 EXTERN_C_END
3215 
3216 #undef __FUNCT__
3217 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3218 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3219 {
3220   Mat            mat;
3221   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3222   PetscErrorCode ierr;
3223 
3224   PetscFunctionBegin;
3225   *newmat       = 0;
3226   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3227   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3228   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3229   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3230   a    = (Mat_MPIAIJ*)mat->data;
3231 
3232   mat->factortype    = matin->factortype;
3233   mat->rmap->bs      = matin->rmap->bs;
3234   mat->assembled    = PETSC_TRUE;
3235   mat->insertmode   = NOT_SET_VALUES;
3236   mat->preallocated = PETSC_TRUE;
3237 
3238   a->size           = oldmat->size;
3239   a->rank           = oldmat->rank;
3240   a->donotstash     = oldmat->donotstash;
3241   a->roworiented    = oldmat->roworiented;
3242   a->rowindices     = 0;
3243   a->rowvalues      = 0;
3244   a->getrowactive   = PETSC_FALSE;
3245 
3246   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3247   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3248 
3249   if (oldmat->colmap) {
3250 #if defined (PETSC_USE_CTABLE)
3251     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3252 #else
3253     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3254     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3255     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3256 #endif
3257   } else a->colmap = 0;
3258   if (oldmat->garray) {
3259     PetscInt len;
3260     len  = oldmat->B->cmap->n;
3261     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3262     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3263     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3264   } else a->garray = 0;
3265 
3266   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3267   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3268   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3269   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3270   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3271   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3272   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3273   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3274   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3275   *newmat = mat;
3276   PetscFunctionReturn(0);
3277 }
3278 
3279 
3280 
3281 #undef __FUNCT__
3282 #define __FUNCT__ "MatLoad_MPIAIJ"
3283 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3284 {
3285   PetscScalar    *vals,*svals;
3286   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3287   PetscErrorCode ierr;
3288   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3289   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3290   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3291   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3292   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3293   int            fd;
3294 
3295   PetscFunctionBegin;
3296   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3297   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3298   if (!rank) {
3299     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3300     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3301     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3302   }
3303 
3304   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3305 
3306   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3307   M = header[1]; N = header[2];
3308   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3309   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3310   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3311 
3312   /* If global sizes are set, check if they are consistent with that given in the file */
3313   if (sizesset) {
3314     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3315   }
3316   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3317   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3318 
3319   /* determine ownership of all rows */
3320   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3321   else m = newMat->rmap->n; /* Set by user */
3322 
3323   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3324   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3325 
3326   /* First process needs enough room for process with most rows */
3327   if (!rank) {
3328     mmax       = rowners[1];
3329     for (i=2; i<size; i++) {
3330       mmax = PetscMax(mmax,rowners[i]);
3331     }
3332   } else mmax = m;
3333 
3334   rowners[0] = 0;
3335   for (i=2; i<=size; i++) {
3336     rowners[i] += rowners[i-1];
3337   }
3338   rstart = rowners[rank];
3339   rend   = rowners[rank+1];
3340 
3341   /* distribute row lengths to all processors */
3342   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3343   if (!rank) {
3344     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3345     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3346     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3347     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3348     for (j=0; j<m; j++) {
3349       procsnz[0] += ourlens[j];
3350     }
3351     for (i=1; i<size; i++) {
3352       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3353       /* calculate the number of nonzeros on each processor */
3354       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3355         procsnz[i] += rowlengths[j];
3356       }
3357       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3358     }
3359     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3360   } else {
3361     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3362   }
3363 
3364   if (!rank) {
3365     /* determine max buffer needed and allocate it */
3366     maxnz = 0;
3367     for (i=0; i<size; i++) {
3368       maxnz = PetscMax(maxnz,procsnz[i]);
3369     }
3370     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3371 
3372     /* read in my part of the matrix column indices  */
3373     nz   = procsnz[0];
3374     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3375     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3376 
3377     /* read in every one elses and ship off */
3378     for (i=1; i<size; i++) {
3379       nz     = procsnz[i];
3380       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3381       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3382     }
3383     ierr = PetscFree(cols);CHKERRQ(ierr);
3384   } else {
3385     /* determine buffer space needed for message */
3386     nz = 0;
3387     for (i=0; i<m; i++) {
3388       nz += ourlens[i];
3389     }
3390     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3391 
3392     /* receive message of column indices*/
3393     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3394   }
3395 
3396   /* determine column ownership if matrix is not square */
3397   if (N != M) {
3398     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3399     else n = newMat->cmap->n;
3400     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3401     cstart = cend - n;
3402   } else {
3403     cstart = rstart;
3404     cend   = rend;
3405     n      = cend - cstart;
3406   }
3407 
3408   /* loop over local rows, determining number of off diagonal entries */
3409   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3410   jj = 0;
3411   for (i=0; i<m; i++) {
3412     for (j=0; j<ourlens[i]; j++) {
3413       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3414       jj++;
3415     }
3416   }
3417 
3418   for (i=0; i<m; i++) {
3419     ourlens[i] -= offlens[i];
3420   }
3421   if (!sizesset) {
3422     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3423   }
3424   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3425 
3426   for (i=0; i<m; i++) {
3427     ourlens[i] += offlens[i];
3428   }
3429 
3430   if (!rank) {
3431     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3432 
3433     /* read in my part of the matrix numerical values  */
3434     nz   = procsnz[0];
3435     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3436 
3437     /* insert into matrix */
3438     jj      = rstart;
3439     smycols = mycols;
3440     svals   = vals;
3441     for (i=0; i<m; i++) {
3442       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3443       smycols += ourlens[i];
3444       svals   += ourlens[i];
3445       jj++;
3446     }
3447 
3448     /* read in other processors and ship out */
3449     for (i=1; i<size; i++) {
3450       nz     = procsnz[i];
3451       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3452       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3453     }
3454     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3455   } else {
3456     /* receive numeric values */
3457     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3458 
3459     /* receive message of values*/
3460     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3461 
3462     /* insert into matrix */
3463     jj      = rstart;
3464     smycols = mycols;
3465     svals   = vals;
3466     for (i=0; i<m; i++) {
3467       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3468       smycols += ourlens[i];
3469       svals   += ourlens[i];
3470       jj++;
3471     }
3472   }
3473   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3474   ierr = PetscFree(vals);CHKERRQ(ierr);
3475   ierr = PetscFree(mycols);CHKERRQ(ierr);
3476   ierr = PetscFree(rowners);CHKERRQ(ierr);
3477 
3478   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3479   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3480   PetscFunctionReturn(0);
3481 }
3482 
3483 #undef __FUNCT__
3484 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3485 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3486 {
3487   PetscErrorCode ierr;
3488   IS             iscol_local;
3489   PetscInt       csize;
3490 
3491   PetscFunctionBegin;
3492   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3493   if (call == MAT_REUSE_MATRIX) {
3494     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3495     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3496   } else {
3497     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3498   }
3499   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3500   if (call == MAT_INITIAL_MATRIX) {
3501     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3502     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3503   }
3504   PetscFunctionReturn(0);
3505 }
3506 
3507 #undef __FUNCT__
3508 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3509 /*
3510     Not great since it makes two copies of the submatrix, first an SeqAIJ
3511   in local and then by concatenating the local matrices the end result.
3512   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3513 
3514   Note: This requires a sequential iscol with all indices.
3515 */
3516 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3517 {
3518   PetscErrorCode ierr;
3519   PetscMPIInt    rank,size;
3520   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3521   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3522   Mat            *local,M,Mreuse;
3523   MatScalar      *vwork,*aa;
3524   MPI_Comm       comm = ((PetscObject)mat)->comm;
3525   Mat_SeqAIJ     *aij;
3526 
3527 
3528   PetscFunctionBegin;
3529   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3530   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3531 
3532   if (call ==  MAT_REUSE_MATRIX) {
3533     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3534     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3535     local = &Mreuse;
3536     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3537   } else {
3538     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3539     Mreuse = *local;
3540     ierr   = PetscFree(local);CHKERRQ(ierr);
3541   }
3542 
3543   /*
3544       m - number of local rows
3545       n - number of columns (same on all processors)
3546       rstart - first row in new global matrix generated
3547   */
3548   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3549   if (call == MAT_INITIAL_MATRIX) {
3550     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3551     ii  = aij->i;
3552     jj  = aij->j;
3553 
3554     /*
3555         Determine the number of non-zeros in the diagonal and off-diagonal
3556         portions of the matrix in order to do correct preallocation
3557     */
3558 
3559     /* first get start and end of "diagonal" columns */
3560     if (csize == PETSC_DECIDE) {
3561       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3562       if (mglobal == n) { /* square matrix */
3563 	nlocal = m;
3564       } else {
3565         nlocal = n/size + ((n % size) > rank);
3566       }
3567     } else {
3568       nlocal = csize;
3569     }
3570     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3571     rstart = rend - nlocal;
3572     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3573 
3574     /* next, compute all the lengths */
3575     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3576     olens = dlens + m;
3577     for (i=0; i<m; i++) {
3578       jend = ii[i+1] - ii[i];
3579       olen = 0;
3580       dlen = 0;
3581       for (j=0; j<jend; j++) {
3582         if (*jj < rstart || *jj >= rend) olen++;
3583         else dlen++;
3584         jj++;
3585       }
3586       olens[i] = olen;
3587       dlens[i] = dlen;
3588     }
3589     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3590     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3591     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3592     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3593     ierr = PetscFree(dlens);CHKERRQ(ierr);
3594   } else {
3595     PetscInt ml,nl;
3596 
3597     M = *newmat;
3598     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3599     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3600     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3601     /*
3602          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3603        rather than the slower MatSetValues().
3604     */
3605     M->was_assembled = PETSC_TRUE;
3606     M->assembled     = PETSC_FALSE;
3607   }
3608   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3609   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3610   ii  = aij->i;
3611   jj  = aij->j;
3612   aa  = aij->a;
3613   for (i=0; i<m; i++) {
3614     row   = rstart + i;
3615     nz    = ii[i+1] - ii[i];
3616     cwork = jj;     jj += nz;
3617     vwork = aa;     aa += nz;
3618     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3619   }
3620 
3621   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3622   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3623   *newmat = M;
3624 
3625   /* save submatrix used in processor for next request */
3626   if (call ==  MAT_INITIAL_MATRIX) {
3627     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3628     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3629   }
3630 
3631   PetscFunctionReturn(0);
3632 }
3633 
3634 EXTERN_C_BEGIN
3635 #undef __FUNCT__
3636 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3637 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3638 {
3639   PetscInt       m,cstart, cend,j,nnz,i,d;
3640   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3641   const PetscInt *JJ;
3642   PetscScalar    *values;
3643   PetscErrorCode ierr;
3644 
3645   PetscFunctionBegin;
3646   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3647 
3648   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3649   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3650   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3651   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3652   m      = B->rmap->n;
3653   cstart = B->cmap->rstart;
3654   cend   = B->cmap->rend;
3655   rstart = B->rmap->rstart;
3656 
3657   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3658 
3659 #if defined(PETSC_USE_DEBUGGING)
3660   for (i=0; i<m; i++) {
3661     nnz     = Ii[i+1]- Ii[i];
3662     JJ      = J + Ii[i];
3663     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3664     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3665     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3666   }
3667 #endif
3668 
3669   for (i=0; i<m; i++) {
3670     nnz     = Ii[i+1]- Ii[i];
3671     JJ      = J + Ii[i];
3672     nnz_max = PetscMax(nnz_max,nnz);
3673     d       = 0;
3674     for (j=0; j<nnz; j++) {
3675       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3676     }
3677     d_nnz[i] = d;
3678     o_nnz[i] = nnz - d;
3679   }
3680   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3681   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3682 
3683   if (v) values = (PetscScalar*)v;
3684   else {
3685     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3686     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3687   }
3688 
3689   for (i=0; i<m; i++) {
3690     ii   = i + rstart;
3691     nnz  = Ii[i+1]- Ii[i];
3692     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3693   }
3694   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3695   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3696 
3697   if (!v) {
3698     ierr = PetscFree(values);CHKERRQ(ierr);
3699   }
3700   PetscFunctionReturn(0);
3701 }
3702 EXTERN_C_END
3703 
3704 #undef __FUNCT__
3705 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3706 /*@
3707    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3708    (the default parallel PETSc format).
3709 
3710    Collective on MPI_Comm
3711 
3712    Input Parameters:
3713 +  B - the matrix
3714 .  i - the indices into j for the start of each local row (starts with zero)
3715 .  j - the column indices for each local row (starts with zero)
3716 -  v - optional values in the matrix
3717 
3718    Level: developer
3719 
3720    Notes:
3721        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3722      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3723      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3724 
3725        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3726 
3727        The format which is used for the sparse matrix input, is equivalent to a
3728     row-major ordering.. i.e for the following matrix, the input data expected is
3729     as shown:
3730 
3731         1 0 0
3732         2 0 3     P0
3733        -------
3734         4 5 6     P1
3735 
3736      Process0 [P0]: rows_owned=[0,1]
3737         i =  {0,1,3}  [size = nrow+1  = 2+1]
3738         j =  {0,0,2}  [size = nz = 6]
3739         v =  {1,2,3}  [size = nz = 6]
3740 
3741      Process1 [P1]: rows_owned=[2]
3742         i =  {0,3}    [size = nrow+1  = 1+1]
3743         j =  {0,1,2}  [size = nz = 6]
3744         v =  {4,5,6}  [size = nz = 6]
3745 
3746 .keywords: matrix, aij, compressed row, sparse, parallel
3747 
3748 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3749           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3750 @*/
3751 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3752 {
3753   PetscErrorCode ierr;
3754 
3755   PetscFunctionBegin;
3756   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3757   PetscFunctionReturn(0);
3758 }
3759 
3760 #undef __FUNCT__
3761 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3762 /*@C
3763    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3764    (the default parallel PETSc format).  For good matrix assembly performance
3765    the user should preallocate the matrix storage by setting the parameters
3766    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3767    performance can be increased by more than a factor of 50.
3768 
3769    Collective on MPI_Comm
3770 
3771    Input Parameters:
3772 +  A - the matrix
3773 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3774            (same value is used for all local rows)
3775 .  d_nnz - array containing the number of nonzeros in the various rows of the
3776            DIAGONAL portion of the local submatrix (possibly different for each row)
3777            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3778            The size of this array is equal to the number of local rows, i.e 'm'.
3779            You must leave room for the diagonal entry even if it is zero.
3780 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3781            submatrix (same value is used for all local rows).
3782 -  o_nnz - array containing the number of nonzeros in the various rows of the
3783            OFF-DIAGONAL portion of the local submatrix (possibly different for
3784            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3785            structure. The size of this array is equal to the number
3786            of local rows, i.e 'm'.
3787 
3788    If the *_nnz parameter is given then the *_nz parameter is ignored
3789 
3790    The AIJ format (also called the Yale sparse matrix format or
3791    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3792    storage.  The stored row and column indices begin with zero.
3793    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3794 
3795    The parallel matrix is partitioned such that the first m0 rows belong to
3796    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3797    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3798 
3799    The DIAGONAL portion of the local submatrix of a processor can be defined
3800    as the submatrix which is obtained by extraction the part corresponding to
3801    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3802    first row that belongs to the processor, r2 is the last row belonging to
3803    the this processor, and c1-c2 is range of indices of the local part of a
3804    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3805    common case of a square matrix, the row and column ranges are the same and
3806    the DIAGONAL part is also square. The remaining portion of the local
3807    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3808 
3809    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3810 
3811    You can call MatGetInfo() to get information on how effective the preallocation was;
3812    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3813    You can also run with the option -info and look for messages with the string
3814    malloc in them to see if additional memory allocation was needed.
3815 
3816    Example usage:
3817 
3818    Consider the following 8x8 matrix with 34 non-zero values, that is
3819    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3820    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3821    as follows:
3822 
3823 .vb
3824             1  2  0  |  0  3  0  |  0  4
3825     Proc0   0  5  6  |  7  0  0  |  8  0
3826             9  0 10  | 11  0  0  | 12  0
3827     -------------------------------------
3828            13  0 14  | 15 16 17  |  0  0
3829     Proc1   0 18  0  | 19 20 21  |  0  0
3830             0  0  0  | 22 23  0  | 24  0
3831     -------------------------------------
3832     Proc2  25 26 27  |  0  0 28  | 29  0
3833            30  0  0  | 31 32 33  |  0 34
3834 .ve
3835 
3836    This can be represented as a collection of submatrices as:
3837 
3838 .vb
3839       A B C
3840       D E F
3841       G H I
3842 .ve
3843 
3844    Where the submatrices A,B,C are owned by proc0, D,E,F are
3845    owned by proc1, G,H,I are owned by proc2.
3846 
3847    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3848    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3849    The 'M','N' parameters are 8,8, and have the same values on all procs.
3850 
3851    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3852    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3853    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3854    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3855    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3856    matrix, ans [DF] as another SeqAIJ matrix.
3857 
3858    When d_nz, o_nz parameters are specified, d_nz storage elements are
3859    allocated for every row of the local diagonal submatrix, and o_nz
3860    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3861    One way to choose d_nz and o_nz is to use the max nonzerors per local
3862    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3863    In this case, the values of d_nz,o_nz are:
3864 .vb
3865      proc0 : dnz = 2, o_nz = 2
3866      proc1 : dnz = 3, o_nz = 2
3867      proc2 : dnz = 1, o_nz = 4
3868 .ve
3869    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3870    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3871    for proc3. i.e we are using 12+15+10=37 storage locations to store
3872    34 values.
3873 
3874    When d_nnz, o_nnz parameters are specified, the storage is specified
3875    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3876    In the above case the values for d_nnz,o_nnz are:
3877 .vb
3878      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3879      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3880      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3881 .ve
3882    Here the space allocated is sum of all the above values i.e 34, and
3883    hence pre-allocation is perfect.
3884 
3885    Level: intermediate
3886 
3887 .keywords: matrix, aij, compressed row, sparse, parallel
3888 
3889 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3890           MPIAIJ, MatGetInfo()
3891 @*/
3892 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3893 {
3894   PetscErrorCode ierr;
3895 
3896   PetscFunctionBegin;
3897   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3898   PetscValidType(B,1);
3899   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3900   PetscFunctionReturn(0);
3901 }
3902 
3903 #undef __FUNCT__
3904 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3905 /*@
3906      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3907          CSR format the local rows.
3908 
3909    Collective on MPI_Comm
3910 
3911    Input Parameters:
3912 +  comm - MPI communicator
3913 .  m - number of local rows (Cannot be PETSC_DECIDE)
3914 .  n - This value should be the same as the local size used in creating the
3915        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3916        calculated if N is given) For square matrices n is almost always m.
3917 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3918 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3919 .   i - row indices
3920 .   j - column indices
3921 -   a - matrix values
3922 
3923    Output Parameter:
3924 .   mat - the matrix
3925 
3926    Level: intermediate
3927 
3928    Notes:
3929        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3930      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3931      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3932 
3933        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3934 
3935        The format which is used for the sparse matrix input, is equivalent to a
3936     row-major ordering.. i.e for the following matrix, the input data expected is
3937     as shown:
3938 
3939         1 0 0
3940         2 0 3     P0
3941        -------
3942         4 5 6     P1
3943 
3944      Process0 [P0]: rows_owned=[0,1]
3945         i =  {0,1,3}  [size = nrow+1  = 2+1]
3946         j =  {0,0,2}  [size = nz = 6]
3947         v =  {1,2,3}  [size = nz = 6]
3948 
3949      Process1 [P1]: rows_owned=[2]
3950         i =  {0,3}    [size = nrow+1  = 1+1]
3951         j =  {0,1,2}  [size = nz = 6]
3952         v =  {4,5,6}  [size = nz = 6]
3953 
3954 .keywords: matrix, aij, compressed row, sparse, parallel
3955 
3956 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3957           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3958 @*/
3959 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3960 {
3961   PetscErrorCode ierr;
3962 
3963  PetscFunctionBegin;
3964   if (i[0]) {
3965     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3966   }
3967   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3968   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3969   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3970   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3971   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3972   PetscFunctionReturn(0);
3973 }
3974 
3975 #undef __FUNCT__
3976 #define __FUNCT__ "MatCreateMPIAIJ"
3977 /*@C
3978    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3979    (the default parallel PETSc format).  For good matrix assembly performance
3980    the user should preallocate the matrix storage by setting the parameters
3981    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3982    performance can be increased by more than a factor of 50.
3983 
3984    Collective on MPI_Comm
3985 
3986    Input Parameters:
3987 +  comm - MPI communicator
3988 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3989            This value should be the same as the local size used in creating the
3990            y vector for the matrix-vector product y = Ax.
3991 .  n - This value should be the same as the local size used in creating the
3992        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3993        calculated if N is given) For square matrices n is almost always m.
3994 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3995 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3996 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3997            (same value is used for all local rows)
3998 .  d_nnz - array containing the number of nonzeros in the various rows of the
3999            DIAGONAL portion of the local submatrix (possibly different for each row)
4000            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4001            The size of this array is equal to the number of local rows, i.e 'm'.
4002            You must leave room for the diagonal entry even if it is zero.
4003 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4004            submatrix (same value is used for all local rows).
4005 -  o_nnz - array containing the number of nonzeros in the various rows of the
4006            OFF-DIAGONAL portion of the local submatrix (possibly different for
4007            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4008            structure. The size of this array is equal to the number
4009            of local rows, i.e 'm'.
4010 
4011    Output Parameter:
4012 .  A - the matrix
4013 
4014    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4015    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4016    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4017 
4018    Notes:
4019    If the *_nnz parameter is given then the *_nz parameter is ignored
4020 
4021    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4022    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4023    storage requirements for this matrix.
4024 
4025    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4026    processor than it must be used on all processors that share the object for
4027    that argument.
4028 
4029    The user MUST specify either the local or global matrix dimensions
4030    (possibly both).
4031 
4032    The parallel matrix is partitioned across processors such that the
4033    first m0 rows belong to process 0, the next m1 rows belong to
4034    process 1, the next m2 rows belong to process 2 etc.. where
4035    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4036    values corresponding to [m x N] submatrix.
4037 
4038    The columns are logically partitioned with the n0 columns belonging
4039    to 0th partition, the next n1 columns belonging to the next
4040    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4041 
4042    The DIAGONAL portion of the local submatrix on any given processor
4043    is the submatrix corresponding to the rows and columns m,n
4044    corresponding to the given processor. i.e diagonal matrix on
4045    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4046    etc. The remaining portion of the local submatrix [m x (N-n)]
4047    constitute the OFF-DIAGONAL portion. The example below better
4048    illustrates this concept.
4049 
4050    For a square global matrix we define each processor's diagonal portion
4051    to be its local rows and the corresponding columns (a square submatrix);
4052    each processor's off-diagonal portion encompasses the remainder of the
4053    local matrix (a rectangular submatrix).
4054 
4055    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4056 
4057    When calling this routine with a single process communicator, a matrix of
4058    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4059    type of communicator, use the construction mechanism:
4060      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4061 
4062    By default, this format uses inodes (identical nodes) when possible.
4063    We search for consecutive rows with the same nonzero structure, thereby
4064    reusing matrix information to achieve increased efficiency.
4065 
4066    Options Database Keys:
4067 +  -mat_no_inode  - Do not use inodes
4068 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4069 -  -mat_aij_oneindex - Internally use indexing starting at 1
4070         rather than 0.  Note that when calling MatSetValues(),
4071         the user still MUST index entries starting at 0!
4072 
4073 
4074    Example usage:
4075 
4076    Consider the following 8x8 matrix with 34 non-zero values, that is
4077    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4078    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4079    as follows:
4080 
4081 .vb
4082             1  2  0  |  0  3  0  |  0  4
4083     Proc0   0  5  6  |  7  0  0  |  8  0
4084             9  0 10  | 11  0  0  | 12  0
4085     -------------------------------------
4086            13  0 14  | 15 16 17  |  0  0
4087     Proc1   0 18  0  | 19 20 21  |  0  0
4088             0  0  0  | 22 23  0  | 24  0
4089     -------------------------------------
4090     Proc2  25 26 27  |  0  0 28  | 29  0
4091            30  0  0  | 31 32 33  |  0 34
4092 .ve
4093 
4094    This can be represented as a collection of submatrices as:
4095 
4096 .vb
4097       A B C
4098       D E F
4099       G H I
4100 .ve
4101 
4102    Where the submatrices A,B,C are owned by proc0, D,E,F are
4103    owned by proc1, G,H,I are owned by proc2.
4104 
4105    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4106    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4107    The 'M','N' parameters are 8,8, and have the same values on all procs.
4108 
4109    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4110    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4111    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4112    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4113    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4114    matrix, ans [DF] as another SeqAIJ matrix.
4115 
4116    When d_nz, o_nz parameters are specified, d_nz storage elements are
4117    allocated for every row of the local diagonal submatrix, and o_nz
4118    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4119    One way to choose d_nz and o_nz is to use the max nonzerors per local
4120    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4121    In this case, the values of d_nz,o_nz are:
4122 .vb
4123      proc0 : dnz = 2, o_nz = 2
4124      proc1 : dnz = 3, o_nz = 2
4125      proc2 : dnz = 1, o_nz = 4
4126 .ve
4127    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4128    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4129    for proc3. i.e we are using 12+15+10=37 storage locations to store
4130    34 values.
4131 
4132    When d_nnz, o_nnz parameters are specified, the storage is specified
4133    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4134    In the above case the values for d_nnz,o_nnz are:
4135 .vb
4136      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4137      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4138      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4139 .ve
4140    Here the space allocated is sum of all the above values i.e 34, and
4141    hence pre-allocation is perfect.
4142 
4143    Level: intermediate
4144 
4145 .keywords: matrix, aij, compressed row, sparse, parallel
4146 
4147 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4148           MPIAIJ, MatCreateMPIAIJWithArrays()
4149 @*/
4150 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4151 {
4152   PetscErrorCode ierr;
4153   PetscMPIInt    size;
4154 
4155   PetscFunctionBegin;
4156   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4157   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4158   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4159   if (size > 1) {
4160     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4161     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4162   } else {
4163     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4164     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4165   }
4166   PetscFunctionReturn(0);
4167 }
4168 
4169 #undef __FUNCT__
4170 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4171 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4172 {
4173   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4174 
4175   PetscFunctionBegin;
4176   *Ad     = a->A;
4177   *Ao     = a->B;
4178   *colmap = a->garray;
4179   PetscFunctionReturn(0);
4180 }
4181 
4182 #undef __FUNCT__
4183 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4184 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4185 {
4186   PetscErrorCode ierr;
4187   PetscInt       i;
4188   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4189 
4190   PetscFunctionBegin;
4191   if (coloring->ctype == IS_COLORING_GLOBAL) {
4192     ISColoringValue *allcolors,*colors;
4193     ISColoring      ocoloring;
4194 
4195     /* set coloring for diagonal portion */
4196     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4197 
4198     /* set coloring for off-diagonal portion */
4199     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4200     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4201     for (i=0; i<a->B->cmap->n; i++) {
4202       colors[i] = allcolors[a->garray[i]];
4203     }
4204     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4205     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4206     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4207     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4208   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4209     ISColoringValue *colors;
4210     PetscInt        *larray;
4211     ISColoring      ocoloring;
4212 
4213     /* set coloring for diagonal portion */
4214     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4215     for (i=0; i<a->A->cmap->n; i++) {
4216       larray[i] = i + A->cmap->rstart;
4217     }
4218     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4219     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4220     for (i=0; i<a->A->cmap->n; i++) {
4221       colors[i] = coloring->colors[larray[i]];
4222     }
4223     ierr = PetscFree(larray);CHKERRQ(ierr);
4224     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4225     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4226     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4227 
4228     /* set coloring for off-diagonal portion */
4229     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4230     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4231     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4232     for (i=0; i<a->B->cmap->n; i++) {
4233       colors[i] = coloring->colors[larray[i]];
4234     }
4235     ierr = PetscFree(larray);CHKERRQ(ierr);
4236     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4237     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4238     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4239   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4240 
4241   PetscFunctionReturn(0);
4242 }
4243 
4244 #if defined(PETSC_HAVE_ADIC)
4245 #undef __FUNCT__
4246 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4247 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4248 {
4249   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4250   PetscErrorCode ierr;
4251 
4252   PetscFunctionBegin;
4253   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4254   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4255   PetscFunctionReturn(0);
4256 }
4257 #endif
4258 
4259 #undef __FUNCT__
4260 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4261 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4262 {
4263   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4264   PetscErrorCode ierr;
4265 
4266   PetscFunctionBegin;
4267   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4268   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4269   PetscFunctionReturn(0);
4270 }
4271 
4272 #undef __FUNCT__
4273 #define __FUNCT__ "MatMergeSymbolic"
4274 PetscErrorCode  MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4275 {
4276   PetscErrorCode ierr;
4277   PetscInt       m,N,i,rstart,nnz,*dnz,*onz;
4278   PetscInt       *indx;
4279 
4280   PetscFunctionBegin;
4281   /* This routine will ONLY return MPIAIJ type matrix */
4282   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4283   if (n == PETSC_DECIDE){
4284     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4285   }
4286   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4287   rstart -= m;
4288 
4289   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4290   for (i=0;i<m;i++) {
4291     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4292     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4293     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4294   }
4295 
4296   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4297   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4298   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4299   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4300   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4301   PetscFunctionReturn(0);
4302 }
4303 
4304 #undef __FUNCT__
4305 #define __FUNCT__ "MatMergeNumeric"
4306 PetscErrorCode  MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4307 {
4308   PetscErrorCode ierr;
4309   PetscInt       m,N,i,rstart,nnz,Ii;
4310   PetscInt       *indx;
4311   PetscScalar    *values;
4312 
4313   PetscFunctionBegin;
4314   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4315   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4316   for (i=0;i<m;i++) {
4317     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4318     Ii    = i + rstart;
4319     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4320     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4321   }
4322   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4323   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4324   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4325   PetscFunctionReturn(0);
4326 }
4327 
4328 #undef __FUNCT__
4329 #define __FUNCT__ "MatMerge"
4330 /*@
4331       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4332                  matrices from each processor
4333 
4334     Collective on MPI_Comm
4335 
4336    Input Parameters:
4337 +    comm - the communicators the parallel matrix will live on
4338 .    inmat - the input sequential matrices
4339 .    n - number of local columns (or PETSC_DECIDE)
4340 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4341 
4342    Output Parameter:
4343 .    outmat - the parallel matrix generated
4344 
4345     Level: advanced
4346 
4347    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4348 
4349 @*/
4350 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4351 {
4352   PetscErrorCode ierr;
4353 
4354   PetscFunctionBegin;
4355   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4356   if (scall == MAT_INITIAL_MATRIX){
4357     ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4358   }
4359   ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4360   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4361   PetscFunctionReturn(0);
4362 }
4363 
4364 #undef __FUNCT__
4365 #define __FUNCT__ "MatFileSplit"
4366 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4367 {
4368   PetscErrorCode    ierr;
4369   PetscMPIInt       rank;
4370   PetscInt          m,N,i,rstart,nnz;
4371   size_t            len;
4372   const PetscInt    *indx;
4373   PetscViewer       out;
4374   char              *name;
4375   Mat               B;
4376   const PetscScalar *values;
4377 
4378   PetscFunctionBegin;
4379   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4380   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4381   /* Should this be the type of the diagonal block of A? */
4382   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4383   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4384   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4385   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4386   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4387   for (i=0;i<m;i++) {
4388     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4389     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4390     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4391   }
4392   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4393   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4394 
4395   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4396   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4397   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4398   sprintf(name,"%s.%d",outfile,rank);
4399   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4400   ierr = PetscFree(name);
4401   ierr = MatView(B,out);CHKERRQ(ierr);
4402   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4403   ierr = MatDestroy(&B);CHKERRQ(ierr);
4404   PetscFunctionReturn(0);
4405 }
4406 
4407 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4408 #undef __FUNCT__
4409 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4410 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4411 {
4412   PetscErrorCode       ierr;
4413   Mat_Merge_SeqsToMPI  *merge;
4414   PetscContainer       container;
4415 
4416   PetscFunctionBegin;
4417   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4418   if (container) {
4419     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4420     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4421     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4422     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4423     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4424     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4425     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4426     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4427     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4428     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4429     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4430     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4431     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4432     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4433     ierr = PetscFree(merge);CHKERRQ(ierr);
4434     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4435   }
4436   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4437   PetscFunctionReturn(0);
4438 }
4439 
4440 #include <../src/mat/utils/freespace.h>
4441 #include <petscbt.h>
4442 
4443 #undef __FUNCT__
4444 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4445 /*@C
4446       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4447                  matrices from each processor
4448 
4449     Collective on MPI_Comm
4450 
4451    Input Parameters:
4452 +    comm - the communicators the parallel matrix will live on
4453 .    seqmat - the input sequential matrices
4454 .    m - number of local rows (or PETSC_DECIDE)
4455 .    n - number of local columns (or PETSC_DECIDE)
4456 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4457 
4458    Output Parameter:
4459 .    mpimat - the parallel matrix generated
4460 
4461     Level: advanced
4462 
4463    Notes:
4464      The dimensions of the sequential matrix in each processor MUST be the same.
4465      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4466      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4467 @*/
4468 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4469 {
4470   PetscErrorCode       ierr;
4471   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4472   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4473   PetscMPIInt          size,rank,taga,*len_s;
4474   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4475   PetscInt             proc,m;
4476   PetscInt             **buf_ri,**buf_rj;
4477   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4478   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4479   MPI_Request          *s_waits,*r_waits;
4480   MPI_Status           *status;
4481   MatScalar            *aa=a->a;
4482   MatScalar            **abuf_r,*ba_i;
4483   Mat_Merge_SeqsToMPI  *merge;
4484   PetscContainer       container;
4485 
4486   PetscFunctionBegin;
4487   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4488 
4489   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4490   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4491 
4492   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4493   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4494 
4495   bi     = merge->bi;
4496   bj     = merge->bj;
4497   buf_ri = merge->buf_ri;
4498   buf_rj = merge->buf_rj;
4499 
4500   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4501   owners = merge->rowmap->range;
4502   len_s  = merge->len_s;
4503 
4504   /* send and recv matrix values */
4505   /*-----------------------------*/
4506   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4507   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4508 
4509   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4510   for (proc=0,k=0; proc<size; proc++){
4511     if (!len_s[proc]) continue;
4512     i = owners[proc];
4513     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4514     k++;
4515   }
4516 
4517   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4518   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4519   ierr = PetscFree(status);CHKERRQ(ierr);
4520 
4521   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4522   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4523 
4524   /* insert mat values of mpimat */
4525   /*----------------------------*/
4526   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4527   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4528 
4529   for (k=0; k<merge->nrecv; k++){
4530     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4531     nrows = *(buf_ri_k[k]);
4532     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4533     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4534   }
4535 
4536   /* set values of ba */
4537   m = merge->rowmap->n;
4538   for (i=0; i<m; i++) {
4539     arow = owners[rank] + i;
4540     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4541     bnzi = bi[i+1] - bi[i];
4542     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4543 
4544     /* add local non-zero vals of this proc's seqmat into ba */
4545     anzi = ai[arow+1] - ai[arow];
4546     aj   = a->j + ai[arow];
4547     aa   = a->a + ai[arow];
4548     nextaj = 0;
4549     for (j=0; nextaj<anzi; j++){
4550       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4551         ba_i[j] += aa[nextaj++];
4552       }
4553     }
4554 
4555     /* add received vals into ba */
4556     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4557       /* i-th row */
4558       if (i == *nextrow[k]) {
4559         anzi = *(nextai[k]+1) - *nextai[k];
4560         aj   = buf_rj[k] + *(nextai[k]);
4561         aa   = abuf_r[k] + *(nextai[k]);
4562         nextaj = 0;
4563         for (j=0; nextaj<anzi; j++){
4564           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4565             ba_i[j] += aa[nextaj++];
4566           }
4567         }
4568         nextrow[k]++; nextai[k]++;
4569       }
4570     }
4571     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4572   }
4573   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4574   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4575 
4576   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4577   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4578   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4579   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4580   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4581   PetscFunctionReturn(0);
4582 }
4583 
4584 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4585 
4586 #undef __FUNCT__
4587 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4588 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4589 {
4590   PetscErrorCode       ierr;
4591   Mat                  B_mpi;
4592   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4593   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4594   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4595   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4596   PetscInt             len,proc,*dnz,*onz;
4597   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4598   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4599   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4600   MPI_Status           *status;
4601   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4602   PetscBT              lnkbt;
4603   Mat_Merge_SeqsToMPI  *merge;
4604   PetscContainer       container;
4605 
4606   PetscFunctionBegin;
4607   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4608 
4609   /* make sure it is a PETSc comm */
4610   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4611   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4612   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4613 
4614   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4615   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4616 
4617   /* determine row ownership */
4618   /*---------------------------------------------------------*/
4619   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4620   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4621   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4622   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4623   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4624   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4625   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4626 
4627   m      = merge->rowmap->n;
4628   M      = merge->rowmap->N;
4629   owners = merge->rowmap->range;
4630 
4631   /* determine the number of messages to send, their lengths */
4632   /*---------------------------------------------------------*/
4633   len_s  = merge->len_s;
4634 
4635   len = 0;  /* length of buf_si[] */
4636   merge->nsend = 0;
4637   for (proc=0; proc<size; proc++){
4638     len_si[proc] = 0;
4639     if (proc == rank){
4640       len_s[proc] = 0;
4641     } else {
4642       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4643       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4644     }
4645     if (len_s[proc]) {
4646       merge->nsend++;
4647       nrows = 0;
4648       for (i=owners[proc]; i<owners[proc+1]; i++){
4649         if (ai[i+1] > ai[i]) nrows++;
4650       }
4651       len_si[proc] = 2*(nrows+1);
4652       len += len_si[proc];
4653     }
4654   }
4655 
4656   /* determine the number and length of messages to receive for ij-structure */
4657   /*-------------------------------------------------------------------------*/
4658   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4659   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4660 
4661   /* post the Irecv of j-structure */
4662   /*-------------------------------*/
4663   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4664   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4665 
4666   /* post the Isend of j-structure */
4667   /*--------------------------------*/
4668   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4669 
4670   for (proc=0, k=0; proc<size; proc++){
4671     if (!len_s[proc]) continue;
4672     i = owners[proc];
4673     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4674     k++;
4675   }
4676 
4677   /* receives and sends of j-structure are complete */
4678   /*------------------------------------------------*/
4679   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4680   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4681 
4682   /* send and recv i-structure */
4683   /*---------------------------*/
4684   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4685   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4686 
4687   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4688   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4689   for (proc=0,k=0; proc<size; proc++){
4690     if (!len_s[proc]) continue;
4691     /* form outgoing message for i-structure:
4692          buf_si[0]:                 nrows to be sent
4693                [1:nrows]:           row index (global)
4694                [nrows+1:2*nrows+1]: i-structure index
4695     */
4696     /*-------------------------------------------*/
4697     nrows = len_si[proc]/2 - 1;
4698     buf_si_i    = buf_si + nrows+1;
4699     buf_si[0]   = nrows;
4700     buf_si_i[0] = 0;
4701     nrows = 0;
4702     for (i=owners[proc]; i<owners[proc+1]; i++){
4703       anzi = ai[i+1] - ai[i];
4704       if (anzi) {
4705         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4706         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4707         nrows++;
4708       }
4709     }
4710     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4711     k++;
4712     buf_si += len_si[proc];
4713   }
4714 
4715   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4716   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4717 
4718   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4719   for (i=0; i<merge->nrecv; i++){
4720     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4721   }
4722 
4723   ierr = PetscFree(len_si);CHKERRQ(ierr);
4724   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4725   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4726   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4727   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4728   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4729   ierr = PetscFree(status);CHKERRQ(ierr);
4730 
4731   /* compute a local seq matrix in each processor */
4732   /*----------------------------------------------*/
4733   /* allocate bi array and free space for accumulating nonzero column info */
4734   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4735   bi[0] = 0;
4736 
4737   /* create and initialize a linked list */
4738   nlnk = N+1;
4739   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4740 
4741   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4742   len = 0;
4743   len  = ai[owners[rank+1]] - ai[owners[rank]];
4744   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4745   current_space = free_space;
4746 
4747   /* determine symbolic info for each local row */
4748   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4749 
4750   for (k=0; k<merge->nrecv; k++){
4751     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4752     nrows = *buf_ri_k[k];
4753     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4754     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4755   }
4756 
4757   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4758   len = 0;
4759   for (i=0;i<m;i++) {
4760     bnzi   = 0;
4761     /* add local non-zero cols of this proc's seqmat into lnk */
4762     arow   = owners[rank] + i;
4763     anzi   = ai[arow+1] - ai[arow];
4764     aj     = a->j + ai[arow];
4765     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4766     bnzi += nlnk;
4767     /* add received col data into lnk */
4768     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4769       if (i == *nextrow[k]) { /* i-th row */
4770         anzi = *(nextai[k]+1) - *nextai[k];
4771         aj   = buf_rj[k] + *nextai[k];
4772         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4773         bnzi += nlnk;
4774         nextrow[k]++; nextai[k]++;
4775       }
4776     }
4777     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4778 
4779     /* if free space is not available, make more free space */
4780     if (current_space->local_remaining<bnzi) {
4781       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4782       nspacedouble++;
4783     }
4784     /* copy data into free space, then initialize lnk */
4785     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4786     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4787 
4788     current_space->array           += bnzi;
4789     current_space->local_used      += bnzi;
4790     current_space->local_remaining -= bnzi;
4791 
4792     bi[i+1] = bi[i] + bnzi;
4793   }
4794 
4795   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4796 
4797   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4798   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4799   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4800 
4801   /* create symbolic parallel matrix B_mpi */
4802   /*---------------------------------------*/
4803   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4804   if (n==PETSC_DECIDE) {
4805     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4806   } else {
4807     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4808   }
4809   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4810   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4811   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4812 
4813   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4814   B_mpi->assembled     = PETSC_FALSE;
4815   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4816   merge->bi            = bi;
4817   merge->bj            = bj;
4818   merge->buf_ri        = buf_ri;
4819   merge->buf_rj        = buf_rj;
4820   merge->coi           = PETSC_NULL;
4821   merge->coj           = PETSC_NULL;
4822   merge->owners_co     = PETSC_NULL;
4823 
4824   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4825 
4826   /* attach the supporting struct to B_mpi for reuse */
4827   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4828   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4829   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4830   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4831   *mpimat = B_mpi;
4832 
4833   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4834   PetscFunctionReturn(0);
4835 }
4836 
4837 #undef __FUNCT__
4838 #define __FUNCT__ "MatMerge_SeqsToMPI"
4839 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4840 {
4841   PetscErrorCode   ierr;
4842 
4843   PetscFunctionBegin;
4844   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4845   if (scall == MAT_INITIAL_MATRIX){
4846     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4847   }
4848   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4849   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4850   PetscFunctionReturn(0);
4851 }
4852 
4853 #undef __FUNCT__
4854 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4855 /*@
4856      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4857           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4858           with MatGetSize()
4859 
4860     Not Collective
4861 
4862    Input Parameters:
4863 +    A - the matrix
4864 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4865 
4866    Output Parameter:
4867 .    A_loc - the local sequential matrix generated
4868 
4869     Level: developer
4870 
4871 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4872 
4873 @*/
4874 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4875 {
4876   PetscErrorCode  ierr;
4877   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4878   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4879   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4880   MatScalar       *aa=a->a,*ba=b->a,*cam;
4881   PetscScalar     *ca;
4882   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4883   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4884   PetscBool       match;
4885 
4886   PetscFunctionBegin;
4887   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4888   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4889   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4890   if (scall == MAT_INITIAL_MATRIX){
4891     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4892     ci[0] = 0;
4893     for (i=0; i<am; i++){
4894       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4895     }
4896     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4897     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4898     k = 0;
4899     for (i=0; i<am; i++) {
4900       ncols_o = bi[i+1] - bi[i];
4901       ncols_d = ai[i+1] - ai[i];
4902       /* off-diagonal portion of A */
4903       for (jo=0; jo<ncols_o; jo++) {
4904         col = cmap[*bj];
4905         if (col >= cstart) break;
4906         cj[k]   = col; bj++;
4907         ca[k++] = *ba++;
4908       }
4909       /* diagonal portion of A */
4910       for (j=0; j<ncols_d; j++) {
4911         cj[k]   = cstart + *aj++;
4912         ca[k++] = *aa++;
4913       }
4914       /* off-diagonal portion of A */
4915       for (j=jo; j<ncols_o; j++) {
4916         cj[k]   = cmap[*bj++];
4917         ca[k++] = *ba++;
4918       }
4919     }
4920     /* put together the new matrix */
4921     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4922     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4923     /* Since these are PETSc arrays, change flags to free them as necessary. */
4924     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4925     mat->free_a  = PETSC_TRUE;
4926     mat->free_ij = PETSC_TRUE;
4927     mat->nonew   = 0;
4928   } else if (scall == MAT_REUSE_MATRIX){
4929     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4930     ci = mat->i; cj = mat->j; cam = mat->a;
4931     for (i=0; i<am; i++) {
4932       /* off-diagonal portion of A */
4933       ncols_o = bi[i+1] - bi[i];
4934       for (jo=0; jo<ncols_o; jo++) {
4935         col = cmap[*bj];
4936         if (col >= cstart) break;
4937         *cam++ = *ba++; bj++;
4938       }
4939       /* diagonal portion of A */
4940       ncols_d = ai[i+1] - ai[i];
4941       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4942       /* off-diagonal portion of A */
4943       for (j=jo; j<ncols_o; j++) {
4944         *cam++ = *ba++; bj++;
4945       }
4946     }
4947   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4948   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4949   PetscFunctionReturn(0);
4950 }
4951 
4952 #undef __FUNCT__
4953 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4954 /*@C
4955      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4956 
4957     Not Collective
4958 
4959    Input Parameters:
4960 +    A - the matrix
4961 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4962 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4963 
4964    Output Parameter:
4965 .    A_loc - the local sequential matrix generated
4966 
4967     Level: developer
4968 
4969 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4970 
4971 @*/
4972 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4973 {
4974   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4975   PetscErrorCode    ierr;
4976   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4977   IS                isrowa,iscola;
4978   Mat               *aloc;
4979   PetscBool       match;
4980 
4981   PetscFunctionBegin;
4982   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4983   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4984   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4985   if (!row){
4986     start = A->rmap->rstart; end = A->rmap->rend;
4987     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4988   } else {
4989     isrowa = *row;
4990   }
4991   if (!col){
4992     start = A->cmap->rstart;
4993     cmap  = a->garray;
4994     nzA   = a->A->cmap->n;
4995     nzB   = a->B->cmap->n;
4996     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4997     ncols = 0;
4998     for (i=0; i<nzB; i++) {
4999       if (cmap[i] < start) idx[ncols++] = cmap[i];
5000       else break;
5001     }
5002     imark = i;
5003     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5004     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5005     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5006   } else {
5007     iscola = *col;
5008   }
5009   if (scall != MAT_INITIAL_MATRIX){
5010     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5011     aloc[0] = *A_loc;
5012   }
5013   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5014   *A_loc = aloc[0];
5015   ierr = PetscFree(aloc);CHKERRQ(ierr);
5016   if (!row){
5017     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5018   }
5019   if (!col){
5020     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5021   }
5022   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5023   PetscFunctionReturn(0);
5024 }
5025 
5026 #undef __FUNCT__
5027 #define __FUNCT__ "MatGetBrowsOfAcols"
5028 /*@C
5029     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5030 
5031     Collective on Mat
5032 
5033    Input Parameters:
5034 +    A,B - the matrices in mpiaij format
5035 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5036 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5037 
5038    Output Parameter:
5039 +    rowb, colb - index sets of rows and columns of B to extract
5040 -    B_seq - the sequential matrix generated
5041 
5042     Level: developer
5043 
5044 @*/
5045 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5046 {
5047   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5048   PetscErrorCode    ierr;
5049   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5050   IS                isrowb,iscolb;
5051   Mat               *bseq=PETSC_NULL;
5052 
5053   PetscFunctionBegin;
5054   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5055     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5056   }
5057   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5058 
5059   if (scall == MAT_INITIAL_MATRIX){
5060     start = A->cmap->rstart;
5061     cmap  = a->garray;
5062     nzA   = a->A->cmap->n;
5063     nzB   = a->B->cmap->n;
5064     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5065     ncols = 0;
5066     for (i=0; i<nzB; i++) {  /* row < local row index */
5067       if (cmap[i] < start) idx[ncols++] = cmap[i];
5068       else break;
5069     }
5070     imark = i;
5071     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5072     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5073     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5074     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5075   } else {
5076     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5077     isrowb = *rowb; iscolb = *colb;
5078     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5079     bseq[0] = *B_seq;
5080   }
5081   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5082   *B_seq = bseq[0];
5083   ierr = PetscFree(bseq);CHKERRQ(ierr);
5084   if (!rowb){
5085     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5086   } else {
5087     *rowb = isrowb;
5088   }
5089   if (!colb){
5090     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5091   } else {
5092     *colb = iscolb;
5093   }
5094   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5095   PetscFunctionReturn(0);
5096 }
5097 
5098 #undef __FUNCT__
5099 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5100 /*
5101     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5102     of the OFF-DIAGONAL portion of local A
5103 
5104     Collective on Mat
5105 
5106    Input Parameters:
5107 +    A,B - the matrices in mpiaij format
5108 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5109 
5110    Output Parameter:
5111 +    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5112 .    startsj_r - similar to startsj for receives
5113 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5114 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5115 
5116     Level: developer
5117 
5118 */
5119 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5120 {
5121   VecScatter_MPI_General *gen_to,*gen_from;
5122   PetscErrorCode         ierr;
5123   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5124   Mat_SeqAIJ             *b_oth;
5125   VecScatter             ctx=a->Mvctx;
5126   MPI_Comm               comm=((PetscObject)ctx)->comm;
5127   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5128   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5129   PetscScalar            *rvalues,*svalues;
5130   MatScalar              *b_otha,*bufa,*bufA;
5131   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5132   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5133   MPI_Status             *sstatus,rstatus;
5134   PetscMPIInt            jj;
5135   PetscInt               *cols,sbs,rbs;
5136   PetscScalar            *vals;
5137 
5138   PetscFunctionBegin;
5139   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5140     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5141   }
5142   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5143   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5144 
5145   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5146   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5147   rvalues  = gen_from->values; /* holds the length of receiving row */
5148   svalues  = gen_to->values;   /* holds the length of sending row */
5149   nrecvs   = gen_from->n;
5150   nsends   = gen_to->n;
5151 
5152   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5153   srow     = gen_to->indices;   /* local row index to be sent */
5154   sstarts  = gen_to->starts;
5155   sprocs   = gen_to->procs;
5156   sstatus  = gen_to->sstatus;
5157   sbs      = gen_to->bs;
5158   rstarts  = gen_from->starts;
5159   rprocs   = gen_from->procs;
5160   rbs      = gen_from->bs;
5161 
5162   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5163   if (scall == MAT_INITIAL_MATRIX){
5164     /* i-array */
5165     /*---------*/
5166     /*  post receives */
5167     for (i=0; i<nrecvs; i++){
5168       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5169       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5170       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5171     }
5172 
5173     /* pack the outgoing message */
5174     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5175     sstartsj[0] = 0;  rstartsj[0] = 0;
5176     len = 0; /* total length of j or a array to be sent */
5177     k = 0;
5178     for (i=0; i<nsends; i++){
5179       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5180       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5181       for (j=0; j<nrows; j++) {
5182         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5183         for (l=0; l<sbs; l++){
5184           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5185           rowlen[j*sbs+l] = ncols;
5186           len += ncols;
5187           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5188         }
5189         k++;
5190       }
5191       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5192       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5193     }
5194     /* recvs and sends of i-array are completed */
5195     i = nrecvs;
5196     while (i--) {
5197       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5198     }
5199     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5200 
5201     /* allocate buffers for sending j and a arrays */
5202     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5203     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5204 
5205     /* create i-array of B_oth */
5206     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5207     b_othi[0] = 0;
5208     len = 0; /* total length of j or a array to be received */
5209     k = 0;
5210     for (i=0; i<nrecvs; i++){
5211       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5212       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5213       for (j=0; j<nrows; j++) {
5214         b_othi[k+1] = b_othi[k] + rowlen[j];
5215         len += rowlen[j]; k++;
5216       }
5217       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5218     }
5219 
5220     /* allocate space for j and a arrrays of B_oth */
5221     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5222     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5223 
5224     /* j-array */
5225     /*---------*/
5226     /*  post receives of j-array */
5227     for (i=0; i<nrecvs; i++){
5228       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5229       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5230     }
5231 
5232     /* pack the outgoing message j-array */
5233     k = 0;
5234     for (i=0; i<nsends; i++){
5235       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5236       bufJ = bufj+sstartsj[i];
5237       for (j=0; j<nrows; j++) {
5238         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5239         for (ll=0; ll<sbs; ll++){
5240           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5241           for (l=0; l<ncols; l++){
5242             *bufJ++ = cols[l];
5243           }
5244           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5245         }
5246       }
5247       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5248     }
5249 
5250     /* recvs and sends of j-array are completed */
5251     i = nrecvs;
5252     while (i--) {
5253       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5254     }
5255     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5256   } else if (scall == MAT_REUSE_MATRIX){
5257     sstartsj = *startsj;
5258     rstartsj = *startsj_r;
5259     bufa     = *bufa_ptr;
5260     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5261     b_otha   = b_oth->a;
5262   } else {
5263     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5264   }
5265 
5266   /* a-array */
5267   /*---------*/
5268   /*  post receives of a-array */
5269   for (i=0; i<nrecvs; i++){
5270     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5271     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5272   }
5273 
5274   /* pack the outgoing message a-array */
5275   k = 0;
5276   for (i=0; i<nsends; i++){
5277     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5278     bufA = bufa+sstartsj[i];
5279     for (j=0; j<nrows; j++) {
5280       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5281       for (ll=0; ll<sbs; ll++){
5282         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5283         for (l=0; l<ncols; l++){
5284           *bufA++ = vals[l];
5285         }
5286         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5287       }
5288     }
5289     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5290   }
5291   /* recvs and sends of a-array are completed */
5292   i = nrecvs;
5293   while (i--) {
5294     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5295   }
5296   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5297   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5298 
5299   if (scall == MAT_INITIAL_MATRIX){
5300     /* put together the new matrix */
5301     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5302 
5303     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5304     /* Since these are PETSc arrays, change flags to free them as necessary. */
5305     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5306     b_oth->free_a  = PETSC_TRUE;
5307     b_oth->free_ij = PETSC_TRUE;
5308     b_oth->nonew   = 0;
5309 
5310     ierr = PetscFree(bufj);CHKERRQ(ierr);
5311     if (!startsj || !bufa_ptr){
5312       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5313       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5314     } else {
5315       *startsj   = sstartsj;
5316       *startsj_r = rstartsj;
5317       *bufa_ptr  = bufa;
5318     }
5319   }
5320   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5321   PetscFunctionReturn(0);
5322 }
5323 
5324 #undef __FUNCT__
5325 #define __FUNCT__ "MatGetCommunicationStructs"
5326 /*@C
5327   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5328 
5329   Not Collective
5330 
5331   Input Parameters:
5332 . A - The matrix in mpiaij format
5333 
5334   Output Parameter:
5335 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5336 . colmap - A map from global column index to local index into lvec
5337 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5338 
5339   Level: developer
5340 
5341 @*/
5342 #if defined (PETSC_USE_CTABLE)
5343 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5344 #else
5345 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5346 #endif
5347 {
5348   Mat_MPIAIJ *a;
5349 
5350   PetscFunctionBegin;
5351   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5352   PetscValidPointer(lvec, 2);
5353   PetscValidPointer(colmap, 3);
5354   PetscValidPointer(multScatter, 4);
5355   a = (Mat_MPIAIJ *) A->data;
5356   if (lvec) *lvec = a->lvec;
5357   if (colmap) *colmap = a->colmap;
5358   if (multScatter) *multScatter = a->Mvctx;
5359   PetscFunctionReturn(0);
5360 }
5361 
5362 EXTERN_C_BEGIN
5363 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5364 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5365 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5366 EXTERN_C_END
5367 
5368 #undef __FUNCT__
5369 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5370 /*
5371     Computes (B'*A')' since computing B*A directly is untenable
5372 
5373                n                       p                          p
5374         (              )       (              )         (                  )
5375       m (      A       )  *  n (       B      )   =   m (         C        )
5376         (              )       (              )         (                  )
5377 
5378 */
5379 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5380 {
5381   PetscErrorCode     ierr;
5382   Mat                At,Bt,Ct;
5383 
5384   PetscFunctionBegin;
5385   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5386   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5387   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5388   ierr = MatDestroy(&At);CHKERRQ(ierr);
5389   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5390   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5391   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5392   PetscFunctionReturn(0);
5393 }
5394 
5395 #undef __FUNCT__
5396 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5397 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5398 {
5399   PetscErrorCode ierr;
5400   PetscInt       m=A->rmap->n,n=B->cmap->n;
5401   Mat            Cmat;
5402 
5403   PetscFunctionBegin;
5404   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5405   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5406   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5407   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5408   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5409   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5410   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5411   *C   = Cmat;
5412   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5413   PetscFunctionReturn(0);
5414 }
5415 
5416 /* ----------------------------------------------------------------*/
5417 #undef __FUNCT__
5418 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5419 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5420 {
5421   PetscErrorCode ierr;
5422 
5423   PetscFunctionBegin;
5424   if (scall == MAT_INITIAL_MATRIX){
5425     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5426   }
5427   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5428   PetscFunctionReturn(0);
5429 }
5430 
5431 EXTERN_C_BEGIN
5432 #if defined(PETSC_HAVE_MUMPS)
5433 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5434 #endif
5435 #if defined(PETSC_HAVE_PASTIX)
5436 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5437 #endif
5438 #if defined(PETSC_HAVE_SUPERLU_DIST)
5439 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5440 #endif
5441 #if defined(PETSC_HAVE_SPOOLES)
5442 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5443 #endif
5444 EXTERN_C_END
5445 
5446 /*MC
5447    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5448 
5449    Options Database Keys:
5450 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5451 
5452   Level: beginner
5453 
5454 .seealso: MatCreateMPIAIJ()
5455 M*/
5456 
5457 EXTERN_C_BEGIN
5458 #undef __FUNCT__
5459 #define __FUNCT__ "MatCreate_MPIAIJ"
5460 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5461 {
5462   Mat_MPIAIJ     *b;
5463   PetscErrorCode ierr;
5464   PetscMPIInt    size;
5465 
5466   PetscFunctionBegin;
5467   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5468 
5469   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5470   B->data         = (void*)b;
5471   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5472   B->rmap->bs     = 1;
5473   B->assembled    = PETSC_FALSE;
5474 
5475   B->insertmode   = NOT_SET_VALUES;
5476   b->size         = size;
5477   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5478 
5479   /* build cache for off array entries formed */
5480   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5481   b->donotstash  = PETSC_FALSE;
5482   b->colmap      = 0;
5483   b->garray      = 0;
5484   b->roworiented = PETSC_TRUE;
5485 
5486   /* stuff used for matrix vector multiply */
5487   b->lvec      = PETSC_NULL;
5488   b->Mvctx     = PETSC_NULL;
5489 
5490   /* stuff for MatGetRow() */
5491   b->rowindices   = 0;
5492   b->rowvalues    = 0;
5493   b->getrowactive = PETSC_FALSE;
5494 
5495 #if defined(PETSC_HAVE_SPOOLES)
5496   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5497                                      "MatGetFactor_mpiaij_spooles",
5498                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5499 #endif
5500 #if defined(PETSC_HAVE_MUMPS)
5501   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5502                                      "MatGetFactor_aij_mumps",
5503                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5504 #endif
5505 #if defined(PETSC_HAVE_PASTIX)
5506   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5507 					   "MatGetFactor_mpiaij_pastix",
5508 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5509 #endif
5510 #if defined(PETSC_HAVE_SUPERLU_DIST)
5511   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5512                                      "MatGetFactor_mpiaij_superlu_dist",
5513                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5514 #endif
5515   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5516                                      "MatStoreValues_MPIAIJ",
5517                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5518   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5519                                      "MatRetrieveValues_MPIAIJ",
5520                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5521   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5522 				     "MatGetDiagonalBlock_MPIAIJ",
5523                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5524   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5525 				     "MatIsTranspose_MPIAIJ",
5526 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5527   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5528 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5529 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5530   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5531 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5532 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5533   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5534 				     "MatDiagonalScaleLocal_MPIAIJ",
5535 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5536   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5537                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5538                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5539   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5540                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5541                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5542   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5543                                      "MatConvert_MPIAIJ_MPISBAIJ",
5544                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5545   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5546                                      "MatMatMult_MPIDense_MPIAIJ",
5547                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5548   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5549                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5550                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5551   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5552                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5553                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5554   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5555   PetscFunctionReturn(0);
5556 }
5557 EXTERN_C_END
5558 
5559 #undef __FUNCT__
5560 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5561 /*@
5562      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5563          and "off-diagonal" part of the matrix in CSR format.
5564 
5565    Collective on MPI_Comm
5566 
5567    Input Parameters:
5568 +  comm - MPI communicator
5569 .  m - number of local rows (Cannot be PETSC_DECIDE)
5570 .  n - This value should be the same as the local size used in creating the
5571        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5572        calculated if N is given) For square matrices n is almost always m.
5573 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5574 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5575 .   i - row indices for "diagonal" portion of matrix
5576 .   j - column indices
5577 .   a - matrix values
5578 .   oi - row indices for "off-diagonal" portion of matrix
5579 .   oj - column indices
5580 -   oa - matrix values
5581 
5582    Output Parameter:
5583 .   mat - the matrix
5584 
5585    Level: advanced
5586 
5587    Notes:
5588        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5589        must free the arrays once the matrix has been destroyed and not before.
5590 
5591        The i and j indices are 0 based
5592 
5593        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5594 
5595        This sets local rows and cannot be used to set off-processor values.
5596 
5597        You cannot later use MatSetValues() to change values in this matrix.
5598 
5599 .keywords: matrix, aij, compressed row, sparse, parallel
5600 
5601 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5602           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5603 @*/
5604 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5605 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5606 {
5607   PetscErrorCode ierr;
5608   Mat_MPIAIJ     *maij;
5609 
5610  PetscFunctionBegin;
5611   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5612   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5613   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5614   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5615   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5616   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5617   maij = (Mat_MPIAIJ*) (*mat)->data;
5618   maij->donotstash     = PETSC_TRUE;
5619   (*mat)->preallocated = PETSC_TRUE;
5620 
5621   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5622   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5623   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5624   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5625 
5626   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5627   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5628 
5629   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5630   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5631   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5632   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5633 
5634   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5635   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5636   PetscFunctionReturn(0);
5637 }
5638 
5639 /*
5640     Special version for direct calls from Fortran
5641 */
5642 #include <private/fortranimpl.h>
5643 
5644 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5645 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5646 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5647 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5648 #endif
5649 
5650 /* Change these macros so can be used in void function */
5651 #undef CHKERRQ
5652 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5653 #undef SETERRQ2
5654 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5655 #undef SETERRQ
5656 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5657 
5658 EXTERN_C_BEGIN
5659 #undef __FUNCT__
5660 #define __FUNCT__ "matsetvaluesmpiaij_"
5661 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5662 {
5663   Mat             mat = *mmat;
5664   PetscInt        m = *mm, n = *mn;
5665   InsertMode      addv = *maddv;
5666   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5667   PetscScalar     value;
5668   PetscErrorCode  ierr;
5669 
5670   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5671   if (mat->insertmode == NOT_SET_VALUES) {
5672     mat->insertmode = addv;
5673   }
5674 #if defined(PETSC_USE_DEBUG)
5675   else if (mat->insertmode != addv) {
5676     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5677   }
5678 #endif
5679   {
5680   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5681   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5682   PetscBool       roworiented = aij->roworiented;
5683 
5684   /* Some Variables required in the macro */
5685   Mat             A = aij->A;
5686   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5687   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5688   MatScalar       *aa = a->a;
5689   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5690   Mat             B = aij->B;
5691   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5692   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5693   MatScalar       *ba = b->a;
5694 
5695   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5696   PetscInt        nonew = a->nonew;
5697   MatScalar       *ap1,*ap2;
5698 
5699   PetscFunctionBegin;
5700   for (i=0; i<m; i++) {
5701     if (im[i] < 0) continue;
5702 #if defined(PETSC_USE_DEBUG)
5703     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5704 #endif
5705     if (im[i] >= rstart && im[i] < rend) {
5706       row      = im[i] - rstart;
5707       lastcol1 = -1;
5708       rp1      = aj + ai[row];
5709       ap1      = aa + ai[row];
5710       rmax1    = aimax[row];
5711       nrow1    = ailen[row];
5712       low1     = 0;
5713       high1    = nrow1;
5714       lastcol2 = -1;
5715       rp2      = bj + bi[row];
5716       ap2      = ba + bi[row];
5717       rmax2    = bimax[row];
5718       nrow2    = bilen[row];
5719       low2     = 0;
5720       high2    = nrow2;
5721 
5722       for (j=0; j<n; j++) {
5723         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5724         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5725         if (in[j] >= cstart && in[j] < cend){
5726           col = in[j] - cstart;
5727           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5728         } else if (in[j] < 0) continue;
5729 #if defined(PETSC_USE_DEBUG)
5730         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5731 #endif
5732         else {
5733           if (mat->was_assembled) {
5734             if (!aij->colmap) {
5735               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5736             }
5737 #if defined (PETSC_USE_CTABLE)
5738             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5739 	    col--;
5740 #else
5741             col = aij->colmap[in[j]] - 1;
5742 #endif
5743             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5744               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5745               col =  in[j];
5746               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5747               B = aij->B;
5748               b = (Mat_SeqAIJ*)B->data;
5749               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5750               rp2      = bj + bi[row];
5751               ap2      = ba + bi[row];
5752               rmax2    = bimax[row];
5753               nrow2    = bilen[row];
5754               low2     = 0;
5755               high2    = nrow2;
5756               bm       = aij->B->rmap->n;
5757               ba = b->a;
5758             }
5759           } else col = in[j];
5760           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5761         }
5762       }
5763     } else {
5764       if (!aij->donotstash) {
5765         if (roworiented) {
5766           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5767         } else {
5768           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5769         }
5770       }
5771     }
5772   }}
5773   PetscFunctionReturnVoid();
5774 }
5775 EXTERN_C_END
5776 
5777