xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 84df9cb40eca90ea9b18a456fab7a4ecc7f6c1a4)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew = a->nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506         } else if (in[j] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509 #endif
510         else {
511           if (mat->was_assembled) {
512             if (!aij->colmap) {
513               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
514             }
515 #if defined (PETSC_USE_CTABLE)
516             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
517 	    col--;
518 #else
519             col = aij->colmap[in[j]] - 1;
520 #endif
521             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523               col =  in[j];
524               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525               B = aij->B;
526               b = (Mat_SeqAIJ*)B->data;
527               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528               rp2      = bj + bi[row];
529               ap2      = ba + bi[row];
530               rmax2    = bimax[row];
531               nrow2    = bilen[row];
532               low2     = 0;
533               high2    = nrow2;
534               bm       = aij->B->rmap->n;
535               ba = b->a;
536             }
537           } else col = in[j];
538           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539         }
540       }
541     } else {
542       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543       if (!aij->donotstash) {
544         if (roworiented) {
545           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
546         } else {
547           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPIAIJ"
557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558 {
559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
560   PetscErrorCode ierr;
561   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
563 
564   PetscFunctionBegin;
565   for (i=0; i<m; i++) {
566     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568     if (idxm[i] >= rstart && idxm[i] < rend) {
569       row = idxm[i] - rstart;
570       for (j=0; j<n; j++) {
571         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573         if (idxn[j] >= cstart && idxn[j] < cend){
574           col = idxn[j] - cstart;
575           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
576         } else {
577           if (!aij->colmap) {
578             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
579           }
580 #if defined (PETSC_USE_CTABLE)
581           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
582           col --;
583 #else
584           col = aij->colmap[idxn[j]] - 1;
585 #endif
586           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587           else {
588             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604 {
605   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
606   PetscErrorCode ierr;
607   PetscInt       nstash,reallocs;
608   InsertMode     addv;
609 
610   PetscFunctionBegin;
611   if (aij->donotstash || mat->nooffprocentries) {
612     PetscFunctionReturn(0);
613   }
614 
615   /* make sure all processors are either in INSERTMODE or ADDMODE */
616   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
617   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618   mat->insertmode = addv; /* in case this processor had no cache */
619 
620   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
621   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
622   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
632   PetscErrorCode ierr;
633   PetscMPIInt    n;
634   PetscInt       i,j,rstart,ncols,flg;
635   PetscInt       *row,*col;
636   PetscBool      other_disassembled;
637   PetscScalar    *val;
638   InsertMode     addv = mat->insertmode;
639 
640   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
641   PetscFunctionBegin;
642   if (!aij->donotstash && !mat->nooffprocentries) {
643     while (1) {
644       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
645       if (!flg) break;
646 
647       for (i=0; i<n;) {
648         /* Now identify the consecutive vals belonging to the same row */
649         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650         if (j < n) ncols = j-i;
651         else       ncols = n-i;
652         /* Now assemble all these values with a single function call */
653         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
654         i = j;
655       }
656     }
657     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
658   }
659   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
660   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
661 
662   /* determine if any processor has disassembled, if so we must
663      also disassemble ourselfs, in order that we may reassemble. */
664   /*
665      if nonzero structure of submatrix B cannot change then we know that
666      no processor disassembled thus we can skip this stuff
667   */
668   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
669     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
670     if (mat->was_assembled && !other_disassembled) {
671       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
672     }
673   }
674   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
676   }
677   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
678   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
679   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
681 
682   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
683   aij->rowvalues = 0;
684 
685   /* used by MatAXPY() */
686   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
687   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
688 
689   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
690   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697 {
698   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
699   PetscErrorCode ierr;
700 
701   PetscFunctionBegin;
702   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
703   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatZeroRows_MPIAIJ"
709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710 {
711   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
712   PetscErrorCode    ierr;
713   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714   PetscInt          i,*owners = A->rmap->range;
715   PetscInt          *nprocs,j,idx,nsends,row;
716   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
717   PetscInt          *rvalues,count,base,slen,*source;
718   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
719   MPI_Comm          comm = ((PetscObject)A)->comm;
720   MPI_Request       *send_waits,*recv_waits;
721   MPI_Status        recv_status,*send_status;
722   const PetscScalar *xx;
723   PetscScalar       *bb;
724 #if defined(PETSC_DEBUG)
725   PetscBool      found = PETSC_FALSE;
726 #endif
727 
728   PetscFunctionBegin;
729   /*  first count number of contributors to each processor */
730   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
731   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
732   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
733   j = 0;
734   for (i=0; i<N; i++) {
735     if (lastidx > (idx = rows[i])) j = 0;
736     lastidx = idx;
737     for (; j<size; j++) {
738       if (idx >= owners[j] && idx < owners[j+1]) {
739         nprocs[2*j]++;
740         nprocs[2*j+1] = 1;
741         owner[i] = j;
742 #if defined(PETSC_DEBUG)
743         found = PETSC_TRUE;
744 #endif
745         break;
746       }
747     }
748 #if defined(PETSC_DEBUG)
749     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750     found = PETSC_FALSE;
751 #endif
752   }
753   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
754 
755   if (A->nooffproczerorows) {
756     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757     nrecvs = nsends;
758     nmax   = N;
759   } else {
760     /* inform other processors of number of messages and max length*/
761     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
762   }
763 
764   /* post receives:   */
765   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
766   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
767   for (i=0; i<nrecvs; i++) {
768     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
769   }
770 
771   /* do sends:
772       1) starts[i] gives the starting index in svalues for stuff going to
773          the ith processor
774   */
775   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
776   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
777   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
778   starts[0] = 0;
779   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780   for (i=0; i<N; i++) {
781     svalues[starts[owner[i]]++] = rows[i];
782   }
783 
784   starts[0] = 0;
785   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786   count = 0;
787   for (i=0; i<size; i++) {
788     if (nprocs[2*i+1]) {
789       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
790     }
791   }
792   ierr = PetscFree(starts);CHKERRQ(ierr);
793 
794   base = owners[rank];
795 
796   /*  wait on receives */
797   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
798   count  = nrecvs; slen = 0;
799   while (count) {
800     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
801     /* unpack receives into our local space */
802     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
803     source[imdex]  = recv_status.MPI_SOURCE;
804     lens[imdex]    = n;
805     slen          += n;
806     count--;
807   }
808   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
809 
810   /* move the data into the send scatter */
811   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
812   count = 0;
813   for (i=0; i<nrecvs; i++) {
814     values = rvalues + i*nmax;
815     for (j=0; j<lens[i]; j++) {
816       lrows[count++] = values[j] - base;
817     }
818   }
819   ierr = PetscFree(rvalues);CHKERRQ(ierr);
820   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
821   ierr = PetscFree(owner);CHKERRQ(ierr);
822   ierr = PetscFree(nprocs);CHKERRQ(ierr);
823 
824   /* fix right hand side if needed */
825   if (x && b) {
826     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
827     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
828     for (i=0; i<slen; i++) {
829       bb[lrows[i]] = diag*xx[lrows[i]];
830     }
831     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
833   }
834   /*
835         Zero the required rows. If the "diagonal block" of the matrix
836      is square and the user wishes to set the diagonal we use separate
837      code so that MatSetValues() is not called for each diagonal allocating
838      new memory, thus calling lots of mallocs and slowing things down.
839 
840   */
841   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
843   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850     }
851     for (i = 0; i < slen; i++) {
852       row  = lrows[i] + rstart;
853       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
854     }
855     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857   } else {
858     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(lrows);CHKERRQ(ierr);
861 
862   /* wait on sends */
863   if (nsends) {
864     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
865     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
866     ierr = PetscFree(send_status);CHKERRQ(ierr);
867   }
868   ierr = PetscFree(send_waits);CHKERRQ(ierr);
869   ierr = PetscFree(svalues);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
876 {
877   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode    ierr;
879   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
880   PetscInt          i,*owners = A->rmap->range;
881   PetscInt          *nprocs,j,idx,nsends;
882   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
883   PetscInt          *rvalues,count,base,slen,*source;
884   PetscInt          *lens,*lrows,*values,m;
885   MPI_Comm          comm = ((PetscObject)A)->comm;
886   MPI_Request       *send_waits,*recv_waits;
887   MPI_Status        recv_status,*send_status;
888   const PetscScalar *xx;
889   PetscScalar       *bb,*mask;
890   Vec               xmask,lmask;
891   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
892   const PetscInt    *aj, *ii,*ridx;
893   PetscScalar       *aa;
894 #if defined(PETSC_DEBUG)
895   PetscBool         found = PETSC_FALSE;
896 #endif
897 
898   PetscFunctionBegin;
899   /*  first count number of contributors to each processor */
900   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
901   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
902   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
903   j = 0;
904   for (i=0; i<N; i++) {
905     if (lastidx > (idx = rows[i])) j = 0;
906     lastidx = idx;
907     for (; j<size; j++) {
908       if (idx >= owners[j] && idx < owners[j+1]) {
909         nprocs[2*j]++;
910         nprocs[2*j+1] = 1;
911         owner[i] = j;
912 #if defined(PETSC_DEBUG)
913         found = PETSC_TRUE;
914 #endif
915         break;
916       }
917     }
918 #if defined(PETSC_DEBUG)
919     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
920     found = PETSC_FALSE;
921 #endif
922   }
923   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
924 
925   /* inform other processors of number of messages and max length*/
926   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
927 
928   /* post receives:   */
929   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
930   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
931   for (i=0; i<nrecvs; i++) {
932     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
933   }
934 
935   /* do sends:
936       1) starts[i] gives the starting index in svalues for stuff going to
937          the ith processor
938   */
939   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
940   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
941   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
942   starts[0] = 0;
943   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
944   for (i=0; i<N; i++) {
945     svalues[starts[owner[i]]++] = rows[i];
946   }
947 
948   starts[0] = 0;
949   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
950   count = 0;
951   for (i=0; i<size; i++) {
952     if (nprocs[2*i+1]) {
953       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
954     }
955   }
956   ierr = PetscFree(starts);CHKERRQ(ierr);
957 
958   base = owners[rank];
959 
960   /*  wait on receives */
961   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
962   count  = nrecvs; slen = 0;
963   while (count) {
964     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
965     /* unpack receives into our local space */
966     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
967     source[imdex]  = recv_status.MPI_SOURCE;
968     lens[imdex]    = n;
969     slen          += n;
970     count--;
971   }
972   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
973 
974   /* move the data into the send scatter */
975   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
976   count = 0;
977   for (i=0; i<nrecvs; i++) {
978     values = rvalues + i*nmax;
979     for (j=0; j<lens[i]; j++) {
980       lrows[count++] = values[j] - base;
981     }
982   }
983   ierr = PetscFree(rvalues);CHKERRQ(ierr);
984   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
985   ierr = PetscFree(owner);CHKERRQ(ierr);
986   ierr = PetscFree(nprocs);CHKERRQ(ierr);
987   /* lrows are the local rows to be zeroed, slen is the number of local rows */
988 
989   /* zero diagonal part of matrix */
990   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
991 
992   /* handle off diagonal part of matrix */
993   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
994   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
995   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
996   for (i=0; i<slen; i++) {
997     bb[lrows[i]] = 1;
998   }
999   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1000   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1003   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1006   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1007   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1008 
1009   /* remove zeroed rows of off diagonal matrix */
1010   ii = aij->i;
1011   for (i=0; i<slen; i++) {
1012     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1013   }
1014 
1015   /* loop over all elements of off process part of matrix zeroing removed columns*/
1016   if (aij->compressedrow.use){
1017     m    = aij->compressedrow.nrows;
1018     ii   = aij->compressedrow.i;
1019     ridx = aij->compressedrow.rindex;
1020     for (i=0; i<m; i++){
1021       n   = ii[i+1] - ii[i];
1022       aj  = aij->j + ii[i];
1023       aa  = aij->a + ii[i];
1024 
1025       for (j=0; j<n; j++) {
1026         if (PetscAbsScalar(mask[*aj])) {
1027           bb[*ridx] -= *aa*xx[*aj];
1028           *aa        = 0.0;
1029         }
1030         aa++;
1031         aj++;
1032       }
1033       ridx++;
1034     }
1035   } else { /* do not use compressed row format */
1036     m = l->B->rmap->n;
1037     for (i=0; i<m; i++) {
1038       n   = ii[i+1] - ii[i];
1039       aj  = aij->j + ii[i];
1040       aa  = aij->a + ii[i];
1041       for (j=0; j<n; j++) {
1042         if (PetscAbsScalar(mask[*aj])) {
1043           bb[i] -= *aa*xx[*aj];
1044           *aa    = 0.0;
1045         }
1046         aa++;
1047         aj++;
1048       }
1049     }
1050   }
1051   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1052   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1053   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1054   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1055   ierr = PetscFree(lrows);CHKERRQ(ierr);
1056 
1057   /* wait on sends */
1058   if (nsends) {
1059     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1060     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1061     ierr = PetscFree(send_status);CHKERRQ(ierr);
1062   }
1063   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1064   ierr = PetscFree(svalues);CHKERRQ(ierr);
1065 
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMult_MPIAIJ"
1071 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075   PetscInt       nt;
1076 
1077   PetscFunctionBegin;
1078   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1079   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1080   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1081   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1082   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1083   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1089 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1090 {
1091   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1101 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1102 {
1103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1108   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1109   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1116 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1117 {
1118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1119   PetscErrorCode ierr;
1120   PetscBool      merged;
1121 
1122   PetscFunctionBegin;
1123   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1124   /* do nondiagonal part */
1125   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1126   if (!merged) {
1127     /* send it on its way */
1128     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1129     /* do local part */
1130     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1131     /* receive remote parts: note this assumes the values are not actually */
1132     /* added in yy until the next line, */
1133     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1134   } else {
1135     /* do local part */
1136     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1137     /* send it on its way */
1138     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1139     /* values actually were received in the Begin() but we need to call this nop */
1140     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1141   }
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 EXTERN_C_BEGIN
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1148 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1149 {
1150   MPI_Comm       comm;
1151   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1152   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1153   IS             Me,Notme;
1154   PetscErrorCode ierr;
1155   PetscInt       M,N,first,last,*notme,i;
1156   PetscMPIInt    size;
1157 
1158   PetscFunctionBegin;
1159 
1160   /* Easy test: symmetric diagonal block */
1161   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1162   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1163   if (!*f) PetscFunctionReturn(0);
1164   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1165   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1166   if (size == 1) PetscFunctionReturn(0);
1167 
1168   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1169   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1170   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1171   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1172   for (i=0; i<first; i++) notme[i] = i;
1173   for (i=last; i<M; i++) notme[i-last+first] = i;
1174   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1175   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1176   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1177   Aoff = Aoffs[0];
1178   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1179   Boff = Boffs[0];
1180   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1181   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1182   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1183   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1184   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1185   ierr = PetscFree(notme);CHKERRQ(ierr);
1186    PetscFunctionReturn(0);
1187 }
1188 EXTERN_C_END
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1192 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1193 {
1194   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1195   PetscErrorCode ierr;
1196 
1197   PetscFunctionBegin;
1198   /* do nondiagonal part */
1199   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1200   /* send it on its way */
1201   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1202   /* do local part */
1203   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1204   /* receive remote parts */
1205   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 /*
1210   This only works correctly for square matrices where the subblock A->A is the
1211    diagonal block
1212 */
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1215 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1216 {
1217   PetscErrorCode ierr;
1218   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1219 
1220   PetscFunctionBegin;
1221   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1222   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1223   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatScale_MPIAIJ"
1229 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1230 {
1231   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1236   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatDestroy_MPIAIJ"
1242 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1243 {
1244   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1245   PetscErrorCode ierr;
1246 
1247   PetscFunctionBegin;
1248 #if defined(PETSC_USE_LOG)
1249   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1250 #endif
1251   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1252   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1253   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1254   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1255 #if defined (PETSC_USE_CTABLE)
1256   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1257 #else
1258   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1259 #endif
1260   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1262   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1263   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1264   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1265   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1266 
1267   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1281 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1282 {
1283   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1284   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1285   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1286   PetscErrorCode    ierr;
1287   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1288   int               fd;
1289   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1290   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1291   PetscScalar       *column_values;
1292   PetscInt          message_count,flowcontrolcount;
1293 
1294   PetscFunctionBegin;
1295   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1296   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1297   nz   = A->nz + B->nz;
1298   if (!rank) {
1299     header[0] = MAT_FILE_CLASSID;
1300     header[1] = mat->rmap->N;
1301     header[2] = mat->cmap->N;
1302     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1303     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1304     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1305     /* get largest number of rows any processor has */
1306     rlen = mat->rmap->n;
1307     range = mat->rmap->range;
1308     for (i=1; i<size; i++) {
1309       rlen = PetscMax(rlen,range[i+1] - range[i]);
1310     }
1311   } else {
1312     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1313     rlen = mat->rmap->n;
1314   }
1315 
1316   /* load up the local row counts */
1317   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1318   for (i=0; i<mat->rmap->n; i++) {
1319     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1320   }
1321 
1322   /* store the row lengths to the file */
1323   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1324   if (!rank) {
1325     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1326     for (i=1; i<size; i++) {
1327       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1328       rlen = range[i+1] - range[i];
1329       ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1330       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1331     }
1332     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1333   } else {
1334     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1335     ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1336     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1337   }
1338   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1339 
1340   /* load up the local column indices */
1341   nzmax = nz; /* )th processor needs space a largest processor needs */
1342   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1343   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1344   cnt  = 0;
1345   for (i=0; i<mat->rmap->n; i++) {
1346     for (j=B->i[i]; j<B->i[i+1]; j++) {
1347       if ( (col = garray[B->j[j]]) > cstart) break;
1348       column_indices[cnt++] = col;
1349     }
1350     for (k=A->i[i]; k<A->i[i+1]; k++) {
1351       column_indices[cnt++] = A->j[k] + cstart;
1352     }
1353     for (; j<B->i[i+1]; j++) {
1354       column_indices[cnt++] = garray[B->j[j]];
1355     }
1356   }
1357   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1358 
1359   /* store the column indices to the file */
1360    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1361   if (!rank) {
1362     MPI_Status status;
1363     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1364     for (i=1; i<size; i++) {
1365       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1366       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1367       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1368       ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1369       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1370     }
1371      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1372   } else {
1373     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1374     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1375     ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1376     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1377   }
1378   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1379 
1380   /* load up the local column values */
1381   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1382   cnt  = 0;
1383   for (i=0; i<mat->rmap->n; i++) {
1384     for (j=B->i[i]; j<B->i[i+1]; j++) {
1385       if ( garray[B->j[j]] > cstart) break;
1386       column_values[cnt++] = B->a[j];
1387     }
1388     for (k=A->i[i]; k<A->i[i+1]; k++) {
1389       column_values[cnt++] = A->a[k];
1390     }
1391     for (; j<B->i[i+1]; j++) {
1392       column_values[cnt++] = B->a[j];
1393     }
1394   }
1395   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1396 
1397   /* store the column values to the file */
1398    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1399   if (!rank) {
1400     MPI_Status status;
1401     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1402     for (i=1; i<size; i++) {
1403        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1404       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1405       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1406       ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1407       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1408     }
1409     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1410   } else {
1411     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1412     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1413     ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1414     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1415   }
1416   ierr = PetscFree(column_values);CHKERRQ(ierr);
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1422 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1423 {
1424   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1425   PetscErrorCode    ierr;
1426   PetscMPIInt       rank = aij->rank,size = aij->size;
1427   PetscBool         isdraw,iascii,isbinary;
1428   PetscViewer       sviewer;
1429   PetscViewerFormat format;
1430 
1431   PetscFunctionBegin;
1432   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1433   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1434   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1435   if (iascii) {
1436     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1437     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1438       MatInfo    info;
1439       PetscBool  inodes;
1440 
1441       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1442       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1443       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1444       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1445       if (!inodes) {
1446         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1447 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1448       } else {
1449         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1450 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1451       }
1452       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1453       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1454       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1455       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1456       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1457       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1458       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1459       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1460       PetscFunctionReturn(0);
1461     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1462       PetscInt   inodecount,inodelimit,*inodes;
1463       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1464       if (inodes) {
1465         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1466       } else {
1467         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1468       }
1469       PetscFunctionReturn(0);
1470     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1471       PetscFunctionReturn(0);
1472     }
1473   } else if (isbinary) {
1474     if (size == 1) {
1475       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1476       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1477     } else {
1478       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1479     }
1480     PetscFunctionReturn(0);
1481   } else if (isdraw) {
1482     PetscDraw  draw;
1483     PetscBool  isnull;
1484     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1485     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1486   }
1487 
1488   if (size == 1) {
1489     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1490     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1491   } else {
1492     /* assemble the entire matrix onto first processor. */
1493     Mat         A;
1494     Mat_SeqAIJ  *Aloc;
1495     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1496     MatScalar   *a;
1497 
1498     if (mat->rmap->N > 1024) {
1499       PetscBool  flg = PETSC_FALSE;
1500 
1501       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1502       if (!flg) {
1503         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1504       }
1505     }
1506 
1507     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1508     if (!rank) {
1509       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1510     } else {
1511       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1512     }
1513     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1514     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1515     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1516     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1517 
1518     /* copy over the A part */
1519     Aloc = (Mat_SeqAIJ*)aij->A->data;
1520     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1521     row = mat->rmap->rstart;
1522     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1523     for (i=0; i<m; i++) {
1524       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1525       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1526     }
1527     aj = Aloc->j;
1528     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1529 
1530     /* copy over the B part */
1531     Aloc = (Mat_SeqAIJ*)aij->B->data;
1532     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1533     row  = mat->rmap->rstart;
1534     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1535     ct   = cols;
1536     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1537     for (i=0; i<m; i++) {
1538       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1539       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1540     }
1541     ierr = PetscFree(ct);CHKERRQ(ierr);
1542     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1543     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1544     /*
1545        Everyone has to call to draw the matrix since the graphics waits are
1546        synchronized across all processors that share the PetscDraw object
1547     */
1548     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1549     if (!rank) {
1550       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1551       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1552       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1553       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1554     }
1555     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1556     ierr = MatDestroy(&A);CHKERRQ(ierr);
1557   }
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatView_MPIAIJ"
1563 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1564 {
1565   PetscErrorCode ierr;
1566   PetscBool      iascii,isdraw,issocket,isbinary;
1567 
1568   PetscFunctionBegin;
1569   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1570   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1571   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1572   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1573   if (iascii || isdraw || isbinary || issocket) {
1574     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1575   } else {
1576     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1577   }
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatSOR_MPIAIJ"
1583 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1584 {
1585   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1586   PetscErrorCode ierr;
1587   Vec            bb1 = 0;
1588   PetscBool      hasop;
1589 
1590   PetscFunctionBegin;
1591   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1592     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1593   }
1594 
1595   if (flag == SOR_APPLY_UPPER) {
1596     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1597     PetscFunctionReturn(0);
1598   }
1599 
1600   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1601     if (flag & SOR_ZERO_INITIAL_GUESS) {
1602       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1603       its--;
1604     }
1605 
1606     while (its--) {
1607       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1608       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1609 
1610       /* update rhs: bb1 = bb - B*x */
1611       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1612       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1613 
1614       /* local sweep */
1615       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1616     }
1617   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1618     if (flag & SOR_ZERO_INITIAL_GUESS) {
1619       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1620       its--;
1621     }
1622     while (its--) {
1623       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1624       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625 
1626       /* update rhs: bb1 = bb - B*x */
1627       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1628       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1629 
1630       /* local sweep */
1631       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1632     }
1633   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1634     if (flag & SOR_ZERO_INITIAL_GUESS) {
1635       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1636       its--;
1637     }
1638     while (its--) {
1639       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1640       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641 
1642       /* update rhs: bb1 = bb - B*x */
1643       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1644       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1645 
1646       /* local sweep */
1647       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1648     }
1649   }  else if (flag & SOR_EISENSTAT) {
1650     Vec         xx1;
1651 
1652     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1653     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1654 
1655     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1656     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657     if (!mat->diag) {
1658       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1659       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1660     }
1661     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1662     if (hasop) {
1663       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1664     } else {
1665       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1666     }
1667     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1668 
1669     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1670 
1671     /* local sweep */
1672     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1673     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1674     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1675   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1676 
1677   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatPermute_MPIAIJ"
1683 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1684 {
1685   MPI_Comm       comm,pcomm;
1686   PetscInt       first,local_size,nrows;
1687   const PetscInt *rows;
1688   PetscMPIInt    size;
1689   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1690   PetscErrorCode ierr;
1691 
1692   PetscFunctionBegin;
1693   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1694   /* make a collective version of 'rowp' */
1695   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1696   if (pcomm==comm) {
1697     crowp = rowp;
1698   } else {
1699     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1700     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1701     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1702     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1703   }
1704   /* collect the global row permutation and invert it */
1705   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1706   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1707   if (pcomm!=comm) {
1708     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1709   }
1710   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1711   /* get the local target indices */
1712   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1713   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1714   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1715   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1716   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1717   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1718   /* the column permutation is so much easier;
1719      make a local version of 'colp' and invert it */
1720   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1721   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1722   if (size==1) {
1723     lcolp = colp;
1724   } else {
1725     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1726     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1727     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1728   }
1729   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1730   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1731   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1732   if (size>1) {
1733     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1734     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1735   }
1736   /* now we just get the submatrix */
1737   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1738   /* clean up */
1739   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1740   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 #undef __FUNCT__
1745 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1746 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1747 {
1748   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1749   Mat            A = mat->A,B = mat->B;
1750   PetscErrorCode ierr;
1751   PetscReal      isend[5],irecv[5];
1752 
1753   PetscFunctionBegin;
1754   info->block_size     = 1.0;
1755   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1756   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1757   isend[3] = info->memory;  isend[4] = info->mallocs;
1758   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1759   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1760   isend[3] += info->memory;  isend[4] += info->mallocs;
1761   if (flag == MAT_LOCAL) {
1762     info->nz_used      = isend[0];
1763     info->nz_allocated = isend[1];
1764     info->nz_unneeded  = isend[2];
1765     info->memory       = isend[3];
1766     info->mallocs      = isend[4];
1767   } else if (flag == MAT_GLOBAL_MAX) {
1768     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1769     info->nz_used      = irecv[0];
1770     info->nz_allocated = irecv[1];
1771     info->nz_unneeded  = irecv[2];
1772     info->memory       = irecv[3];
1773     info->mallocs      = irecv[4];
1774   } else if (flag == MAT_GLOBAL_SUM) {
1775     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1776     info->nz_used      = irecv[0];
1777     info->nz_allocated = irecv[1];
1778     info->nz_unneeded  = irecv[2];
1779     info->memory       = irecv[3];
1780     info->mallocs      = irecv[4];
1781   }
1782   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1783   info->fill_ratio_needed = 0;
1784   info->factor_mallocs    = 0;
1785 
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 #undef __FUNCT__
1790 #define __FUNCT__ "MatSetOption_MPIAIJ"
1791 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1792 {
1793   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1794   PetscErrorCode ierr;
1795 
1796   PetscFunctionBegin;
1797   switch (op) {
1798   case MAT_NEW_NONZERO_LOCATIONS:
1799   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1800   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1801   case MAT_KEEP_NONZERO_PATTERN:
1802   case MAT_NEW_NONZERO_LOCATION_ERR:
1803   case MAT_USE_INODES:
1804   case MAT_IGNORE_ZERO_ENTRIES:
1805     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1806     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1807     break;
1808   case MAT_ROW_ORIENTED:
1809     a->roworiented = flg;
1810     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1811     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1812     break;
1813   case MAT_NEW_DIAGONALS:
1814     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1815     break;
1816   case MAT_IGNORE_OFF_PROC_ENTRIES:
1817     a->donotstash = flg;
1818     break;
1819   case MAT_SPD:
1820     A->spd_set                         = PETSC_TRUE;
1821     A->spd                             = flg;
1822     if (flg) {
1823       A->symmetric                     = PETSC_TRUE;
1824       A->structurally_symmetric        = PETSC_TRUE;
1825       A->symmetric_set                 = PETSC_TRUE;
1826       A->structurally_symmetric_set    = PETSC_TRUE;
1827     }
1828     break;
1829   case MAT_SYMMETRIC:
1830     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1831     break;
1832   case MAT_STRUCTURALLY_SYMMETRIC:
1833     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1834     break;
1835   case MAT_HERMITIAN:
1836     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1837     break;
1838   case MAT_SYMMETRY_ETERNAL:
1839     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1840     break;
1841   default:
1842     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1843   }
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 #undef __FUNCT__
1848 #define __FUNCT__ "MatGetRow_MPIAIJ"
1849 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1850 {
1851   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1852   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1853   PetscErrorCode ierr;
1854   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1855   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1856   PetscInt       *cmap,*idx_p;
1857 
1858   PetscFunctionBegin;
1859   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1860   mat->getrowactive = PETSC_TRUE;
1861 
1862   if (!mat->rowvalues && (idx || v)) {
1863     /*
1864         allocate enough space to hold information from the longest row.
1865     */
1866     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1867     PetscInt   max = 1,tmp;
1868     for (i=0; i<matin->rmap->n; i++) {
1869       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1870       if (max < tmp) { max = tmp; }
1871     }
1872     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1873   }
1874 
1875   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1876   lrow = row - rstart;
1877 
1878   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1879   if (!v)   {pvA = 0; pvB = 0;}
1880   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1881   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1882   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1883   nztot = nzA + nzB;
1884 
1885   cmap  = mat->garray;
1886   if (v  || idx) {
1887     if (nztot) {
1888       /* Sort by increasing column numbers, assuming A and B already sorted */
1889       PetscInt imark = -1;
1890       if (v) {
1891         *v = v_p = mat->rowvalues;
1892         for (i=0; i<nzB; i++) {
1893           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1894           else break;
1895         }
1896         imark = i;
1897         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1898         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1899       }
1900       if (idx) {
1901         *idx = idx_p = mat->rowindices;
1902         if (imark > -1) {
1903           for (i=0; i<imark; i++) {
1904             idx_p[i] = cmap[cworkB[i]];
1905           }
1906         } else {
1907           for (i=0; i<nzB; i++) {
1908             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1909             else break;
1910           }
1911           imark = i;
1912         }
1913         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1914         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1915       }
1916     } else {
1917       if (idx) *idx = 0;
1918       if (v)   *v   = 0;
1919     }
1920   }
1921   *nz = nztot;
1922   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1923   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 #undef __FUNCT__
1928 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1929 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1930 {
1931   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1932 
1933   PetscFunctionBegin;
1934   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1935   aij->getrowactive = PETSC_FALSE;
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatNorm_MPIAIJ"
1941 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1942 {
1943   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1944   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1945   PetscErrorCode ierr;
1946   PetscInt       i,j,cstart = mat->cmap->rstart;
1947   PetscReal      sum = 0.0;
1948   MatScalar      *v;
1949 
1950   PetscFunctionBegin;
1951   if (aij->size == 1) {
1952     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1953   } else {
1954     if (type == NORM_FROBENIUS) {
1955       v = amat->a;
1956       for (i=0; i<amat->nz; i++) {
1957 #if defined(PETSC_USE_COMPLEX)
1958         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1959 #else
1960         sum += (*v)*(*v); v++;
1961 #endif
1962       }
1963       v = bmat->a;
1964       for (i=0; i<bmat->nz; i++) {
1965 #if defined(PETSC_USE_COMPLEX)
1966         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1967 #else
1968         sum += (*v)*(*v); v++;
1969 #endif
1970       }
1971       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1972       *norm = PetscSqrtReal(*norm);
1973     } else if (type == NORM_1) { /* max column norm */
1974       PetscReal *tmp,*tmp2;
1975       PetscInt  *jj,*garray = aij->garray;
1976       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1977       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1978       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1979       *norm = 0.0;
1980       v = amat->a; jj = amat->j;
1981       for (j=0; j<amat->nz; j++) {
1982         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1983       }
1984       v = bmat->a; jj = bmat->j;
1985       for (j=0; j<bmat->nz; j++) {
1986         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1987       }
1988       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1989       for (j=0; j<mat->cmap->N; j++) {
1990         if (tmp2[j] > *norm) *norm = tmp2[j];
1991       }
1992       ierr = PetscFree(tmp);CHKERRQ(ierr);
1993       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1994     } else if (type == NORM_INFINITY) { /* max row norm */
1995       PetscReal ntemp = 0.0;
1996       for (j=0; j<aij->A->rmap->n; j++) {
1997         v = amat->a + amat->i[j];
1998         sum = 0.0;
1999         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2000           sum += PetscAbsScalar(*v); v++;
2001         }
2002         v = bmat->a + bmat->i[j];
2003         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2004           sum += PetscAbsScalar(*v); v++;
2005         }
2006         if (sum > ntemp) ntemp = sum;
2007       }
2008       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2009     } else {
2010       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2011     }
2012   }
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 #undef __FUNCT__
2017 #define __FUNCT__ "MatTranspose_MPIAIJ"
2018 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2019 {
2020   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2021   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2022   PetscErrorCode ierr;
2023   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2024   PetscInt       cstart=A->cmap->rstart,ncol;
2025   Mat            B;
2026   MatScalar      *array;
2027 
2028   PetscFunctionBegin;
2029   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2030 
2031   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2032   ai = Aloc->i; aj = Aloc->j;
2033   bi = Bloc->i; bj = Bloc->j;
2034   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2035     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2036     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2037     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2038     for (i=0; i<ai[ma]; i++){
2039       d_nnz[aj[i]] ++;
2040       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2041     }
2042 
2043     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2044     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2045     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2046     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2047     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2048   } else {
2049     B = *matout;
2050   }
2051 
2052   /* copy over the A part */
2053   array = Aloc->a;
2054   row = A->rmap->rstart;
2055   for (i=0; i<ma; i++) {
2056     ncol = ai[i+1]-ai[i];
2057     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2058     row++; array += ncol; aj += ncol;
2059   }
2060   aj = Aloc->j;
2061   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2062 
2063   /* copy over the B part */
2064   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2065   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2066   array = Bloc->a;
2067   row = A->rmap->rstart;
2068   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2069   cols_tmp = cols;
2070   for (i=0; i<mb; i++) {
2071     ncol = bi[i+1]-bi[i];
2072     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2073     row++; array += ncol; cols_tmp += ncol;
2074   }
2075   ierr = PetscFree(cols);CHKERRQ(ierr);
2076 
2077   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2080     *matout = B;
2081   } else {
2082     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2089 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2090 {
2091   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2092   Mat            a = aij->A,b = aij->B;
2093   PetscErrorCode ierr;
2094   PetscInt       s1,s2,s3;
2095 
2096   PetscFunctionBegin;
2097   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2098   if (rr) {
2099     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2100     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2101     /* Overlap communication with computation. */
2102     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2103   }
2104   if (ll) {
2105     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2106     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2107     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2108   }
2109   /* scale  the diagonal block */
2110   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2111 
2112   if (rr) {
2113     /* Do a scatter end and then right scale the off-diagonal block */
2114     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2115     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2116   }
2117 
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 #undef __FUNCT__
2122 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2123 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2124 {
2125   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2126   PetscErrorCode ierr;
2127 
2128   PetscFunctionBegin;
2129   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2130   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2131   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2132   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2133   PetscFunctionReturn(0);
2134 }
2135 #undef __FUNCT__
2136 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2137 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2138 {
2139   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2140   PetscErrorCode ierr;
2141 
2142   PetscFunctionBegin;
2143   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 #undef __FUNCT__
2148 #define __FUNCT__ "MatEqual_MPIAIJ"
2149 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2150 {
2151   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2152   Mat            a,b,c,d;
2153   PetscBool      flg;
2154   PetscErrorCode ierr;
2155 
2156   PetscFunctionBegin;
2157   a = matA->A; b = matA->B;
2158   c = matB->A; d = matB->B;
2159 
2160   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2161   if (flg) {
2162     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2163   }
2164   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatCopy_MPIAIJ"
2170 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2171 {
2172   PetscErrorCode ierr;
2173   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2174   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2175 
2176   PetscFunctionBegin;
2177   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2178   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2179     /* because of the column compression in the off-processor part of the matrix a->B,
2180        the number of columns in a->B and b->B may be different, hence we cannot call
2181        the MatCopy() directly on the two parts. If need be, we can provide a more
2182        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2183        then copying the submatrices */
2184     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2185   } else {
2186     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2187     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2188   }
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2194 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2195 {
2196   PetscErrorCode ierr;
2197 
2198   PetscFunctionBegin;
2199   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 #undef __FUNCT__
2204 #define __FUNCT__ "MatAXPY_MPIAIJ"
2205 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2206 {
2207   PetscErrorCode ierr;
2208   PetscInt       i;
2209   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2210   PetscBLASInt   bnz,one=1;
2211   Mat_SeqAIJ     *x,*y;
2212 
2213   PetscFunctionBegin;
2214   if (str == SAME_NONZERO_PATTERN) {
2215     PetscScalar alpha = a;
2216     x = (Mat_SeqAIJ *)xx->A->data;
2217     y = (Mat_SeqAIJ *)yy->A->data;
2218     bnz = PetscBLASIntCast(x->nz);
2219     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2220     x = (Mat_SeqAIJ *)xx->B->data;
2221     y = (Mat_SeqAIJ *)yy->B->data;
2222     bnz = PetscBLASIntCast(x->nz);
2223     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2224   } else if (str == SUBSET_NONZERO_PATTERN) {
2225     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2226 
2227     x = (Mat_SeqAIJ *)xx->B->data;
2228     y = (Mat_SeqAIJ *)yy->B->data;
2229     if (y->xtoy && y->XtoY != xx->B) {
2230       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2231       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2232     }
2233     if (!y->xtoy) { /* get xtoy */
2234       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2235       y->XtoY = xx->B;
2236       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2237     }
2238     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2239   } else {
2240     Mat B;
2241     PetscInt *nnz_d,*nnz_o;
2242     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2243     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2244     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2245     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2246     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2247     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2248     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2249     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2250     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2251     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2252     ierr = MatHeaderReplace(Y,B);
2253     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2254     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2255   }
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2260 
2261 #undef __FUNCT__
2262 #define __FUNCT__ "MatConjugate_MPIAIJ"
2263 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2264 {
2265 #if defined(PETSC_USE_COMPLEX)
2266   PetscErrorCode ierr;
2267   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2268 
2269   PetscFunctionBegin;
2270   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2271   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2272 #else
2273   PetscFunctionBegin;
2274 #endif
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 #undef __FUNCT__
2279 #define __FUNCT__ "MatRealPart_MPIAIJ"
2280 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2281 {
2282   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2283   PetscErrorCode ierr;
2284 
2285   PetscFunctionBegin;
2286   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2287   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2294 {
2295   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2300   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 #ifdef PETSC_HAVE_PBGL
2305 
2306 #include <boost/parallel/mpi/bsp_process_group.hpp>
2307 #include <boost/graph/distributed/ilu_default_graph.hpp>
2308 #include <boost/graph/distributed/ilu_0_block.hpp>
2309 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2310 #include <boost/graph/distributed/petsc/interface.hpp>
2311 #include <boost/multi_array.hpp>
2312 #include <boost/parallel/distributed_property_map->hpp>
2313 
2314 #undef __FUNCT__
2315 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2316 /*
2317   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2318 */
2319 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2320 {
2321   namespace petsc = boost::distributed::petsc;
2322 
2323   namespace graph_dist = boost::graph::distributed;
2324   using boost::graph::distributed::ilu_default::process_group_type;
2325   using boost::graph::ilu_permuted;
2326 
2327   PetscBool       row_identity, col_identity;
2328   PetscContainer  c;
2329   PetscInt        m, n, M, N;
2330   PetscErrorCode  ierr;
2331 
2332   PetscFunctionBegin;
2333   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2334   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2335   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2336   if (!row_identity || !col_identity) {
2337     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2338   }
2339 
2340   process_group_type pg;
2341   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2342   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2343   lgraph_type&   level_graph = *lgraph_p;
2344   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2345 
2346   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2347   ilu_permuted(level_graph);
2348 
2349   /* put together the new matrix */
2350   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2351   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2352   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2353   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2354   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2355   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2356   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2357 
2358   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2359   ierr = PetscContainerSetPointer(c, lgraph_p);
2360   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2361   ierr = PetscContainerDestroy(&c);
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2367 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2368 {
2369   PetscFunctionBegin;
2370   PetscFunctionReturn(0);
2371 }
2372 
2373 #undef __FUNCT__
2374 #define __FUNCT__ "MatSolve_MPIAIJ"
2375 /*
2376   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2377 */
2378 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2379 {
2380   namespace graph_dist = boost::graph::distributed;
2381 
2382   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2383   lgraph_type*   lgraph_p;
2384   PetscContainer c;
2385   PetscErrorCode ierr;
2386 
2387   PetscFunctionBegin;
2388   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2389   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2390   ierr = VecCopy(b, x);CHKERRQ(ierr);
2391 
2392   PetscScalar* array_x;
2393   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2394   PetscInt sx;
2395   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2396 
2397   PetscScalar* array_b;
2398   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2399   PetscInt sb;
2400   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2401 
2402   lgraph_type&   level_graph = *lgraph_p;
2403   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2404 
2405   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2406   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2407                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2408 
2409   typedef boost::iterator_property_map<array_ref_type::iterator,
2410                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2411   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2412                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2413 
2414   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2415 
2416   PetscFunctionReturn(0);
2417 }
2418 #endif
2419 
2420 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2421   PetscInt       nzlocal,nsends,nrecvs;
2422   PetscMPIInt    *send_rank,*recv_rank;
2423   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2424   PetscScalar    *sbuf_a,**rbuf_a;
2425   PetscErrorCode (*Destroy)(Mat);
2426 } Mat_Redundant;
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2430 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2431 {
2432   PetscErrorCode       ierr;
2433   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2434   PetscInt             i;
2435 
2436   PetscFunctionBegin;
2437   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2438   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2439   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2440   for (i=0; i<redund->nrecvs; i++){
2441     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2442     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2443   }
2444   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2445   ierr = PetscFree(redund);CHKERRQ(ierr);
2446   PetscFunctionReturn(0);
2447 }
2448 
2449 #undef __FUNCT__
2450 #define __FUNCT__ "MatDestroy_MatRedundant"
2451 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2452 {
2453   PetscErrorCode  ierr;
2454   PetscContainer  container;
2455   Mat_Redundant   *redund=PETSC_NULL;
2456 
2457   PetscFunctionBegin;
2458   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2459   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2460   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2461   A->ops->destroy = redund->Destroy;
2462   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2463   if (A->ops->destroy) {
2464     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2465   }
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 #undef __FUNCT__
2470 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2471 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2472 {
2473   PetscMPIInt    rank,size;
2474   MPI_Comm       comm=((PetscObject)mat)->comm;
2475   PetscErrorCode ierr;
2476   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2477   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2478   PetscInt       *rowrange=mat->rmap->range;
2479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2480   Mat            A=aij->A,B=aij->B,C=*matredundant;
2481   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2482   PetscScalar    *sbuf_a;
2483   PetscInt       nzlocal=a->nz+b->nz;
2484   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2485   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2486   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2487   MatScalar      *aworkA,*aworkB;
2488   PetscScalar    *vals;
2489   PetscMPIInt    tag1,tag2,tag3,imdex;
2490   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2491                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2492   MPI_Status     recv_status,*send_status;
2493   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2494   PetscInt       **rbuf_j=PETSC_NULL;
2495   PetscScalar    **rbuf_a=PETSC_NULL;
2496   Mat_Redundant  *redund=PETSC_NULL;
2497   PetscContainer container;
2498 
2499   PetscFunctionBegin;
2500   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2501   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2502 
2503   if (reuse == MAT_REUSE_MATRIX) {
2504     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2505     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2506     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2507     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2508     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2509     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2510     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2511     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2512 
2513     nsends    = redund->nsends;
2514     nrecvs    = redund->nrecvs;
2515     send_rank = redund->send_rank;
2516     recv_rank = redund->recv_rank;
2517     sbuf_nz   = redund->sbuf_nz;
2518     rbuf_nz   = redund->rbuf_nz;
2519     sbuf_j    = redund->sbuf_j;
2520     sbuf_a    = redund->sbuf_a;
2521     rbuf_j    = redund->rbuf_j;
2522     rbuf_a    = redund->rbuf_a;
2523   }
2524 
2525   if (reuse == MAT_INITIAL_MATRIX){
2526     PetscMPIInt  subrank,subsize;
2527     PetscInt     nleftover,np_subcomm;
2528     /* get the destination processors' id send_rank, nsends and nrecvs */
2529     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2530     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2531     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2532     np_subcomm = size/nsubcomm;
2533     nleftover  = size - nsubcomm*np_subcomm;
2534     nsends = 0; nrecvs = 0;
2535     for (i=0; i<size; i++){ /* i=rank*/
2536       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2537         send_rank[nsends] = i; nsends++;
2538         recv_rank[nrecvs++] = i;
2539       }
2540     }
2541     if (rank >= size - nleftover){/* this proc is a leftover processor */
2542       i = size-nleftover-1;
2543       j = 0;
2544       while (j < nsubcomm - nleftover){
2545         send_rank[nsends++] = i;
2546         i--; j++;
2547       }
2548     }
2549 
2550     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2551       for (i=0; i<nleftover; i++){
2552         recv_rank[nrecvs++] = size-nleftover+i;
2553       }
2554     }
2555 
2556     /* allocate sbuf_j, sbuf_a */
2557     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2558     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2559     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2560   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2561 
2562   /* copy mat's local entries into the buffers */
2563   if (reuse == MAT_INITIAL_MATRIX){
2564     rownz_max = 0;
2565     rptr = sbuf_j;
2566     cols = sbuf_j + rend-rstart + 1;
2567     vals = sbuf_a;
2568     rptr[0] = 0;
2569     for (i=0; i<rend-rstart; i++){
2570       row = i + rstart;
2571       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2572       ncols  = nzA + nzB;
2573       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2574       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2575       /* load the column indices for this row into cols */
2576       lwrite = 0;
2577       for (l=0; l<nzB; l++) {
2578         if ((ctmp = bmap[cworkB[l]]) < cstart){
2579           vals[lwrite]   = aworkB[l];
2580           cols[lwrite++] = ctmp;
2581         }
2582       }
2583       for (l=0; l<nzA; l++){
2584         vals[lwrite]   = aworkA[l];
2585         cols[lwrite++] = cstart + cworkA[l];
2586       }
2587       for (l=0; l<nzB; l++) {
2588         if ((ctmp = bmap[cworkB[l]]) >= cend){
2589           vals[lwrite]   = aworkB[l];
2590           cols[lwrite++] = ctmp;
2591         }
2592       }
2593       vals += ncols;
2594       cols += ncols;
2595       rptr[i+1] = rptr[i] + ncols;
2596       if (rownz_max < ncols) rownz_max = ncols;
2597     }
2598     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2599   } else { /* only copy matrix values into sbuf_a */
2600     rptr = sbuf_j;
2601     vals = sbuf_a;
2602     rptr[0] = 0;
2603     for (i=0; i<rend-rstart; i++){
2604       row = i + rstart;
2605       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2606       ncols  = nzA + nzB;
2607       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2608       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2609       lwrite = 0;
2610       for (l=0; l<nzB; l++) {
2611         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2612       }
2613       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2614       for (l=0; l<nzB; l++) {
2615         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2616       }
2617       vals += ncols;
2618       rptr[i+1] = rptr[i] + ncols;
2619     }
2620   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2621 
2622   /* send nzlocal to others, and recv other's nzlocal */
2623   /*--------------------------------------------------*/
2624   if (reuse == MAT_INITIAL_MATRIX){
2625     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2626     s_waits2 = s_waits3 + nsends;
2627     s_waits1 = s_waits2 + nsends;
2628     r_waits1 = s_waits1 + nsends;
2629     r_waits2 = r_waits1 + nrecvs;
2630     r_waits3 = r_waits2 + nrecvs;
2631   } else {
2632     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2633     r_waits3 = s_waits3 + nsends;
2634   }
2635 
2636   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2637   if (reuse == MAT_INITIAL_MATRIX){
2638     /* get new tags to keep the communication clean */
2639     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2640     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2641     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2642 
2643     /* post receives of other's nzlocal */
2644     for (i=0; i<nrecvs; i++){
2645       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2646     }
2647     /* send nzlocal to others */
2648     for (i=0; i<nsends; i++){
2649       sbuf_nz[i] = nzlocal;
2650       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2651     }
2652     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2653     count = nrecvs;
2654     while (count) {
2655       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2656       recv_rank[imdex] = recv_status.MPI_SOURCE;
2657       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2658       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2659 
2660       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2661       rbuf_nz[imdex] += i + 2;
2662       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2663       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2664       count--;
2665     }
2666     /* wait on sends of nzlocal */
2667     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2668     /* send mat->i,j to others, and recv from other's */
2669     /*------------------------------------------------*/
2670     for (i=0; i<nsends; i++){
2671       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2672       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2673     }
2674     /* wait on receives of mat->i,j */
2675     /*------------------------------*/
2676     count = nrecvs;
2677     while (count) {
2678       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2679       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2680       count--;
2681     }
2682     /* wait on sends of mat->i,j */
2683     /*---------------------------*/
2684     if (nsends) {
2685       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2686     }
2687   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2688 
2689   /* post receives, send and receive mat->a */
2690   /*----------------------------------------*/
2691   for (imdex=0; imdex<nrecvs; imdex++) {
2692     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2693   }
2694   for (i=0; i<nsends; i++){
2695     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2696   }
2697   count = nrecvs;
2698   while (count) {
2699     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2700     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2701     count--;
2702   }
2703   if (nsends) {
2704     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2705   }
2706 
2707   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2708 
2709   /* create redundant matrix */
2710   /*-------------------------*/
2711   if (reuse == MAT_INITIAL_MATRIX){
2712     /* compute rownz_max for preallocation */
2713     for (imdex=0; imdex<nrecvs; imdex++){
2714       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2715       rptr = rbuf_j[imdex];
2716       for (i=0; i<j; i++){
2717         ncols = rptr[i+1] - rptr[i];
2718         if (rownz_max < ncols) rownz_max = ncols;
2719       }
2720     }
2721 
2722     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2723     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2724     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2725     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2726     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2727   } else {
2728     C = *matredundant;
2729   }
2730 
2731   /* insert local matrix entries */
2732   rptr = sbuf_j;
2733   cols = sbuf_j + rend-rstart + 1;
2734   vals = sbuf_a;
2735   for (i=0; i<rend-rstart; i++){
2736     row   = i + rstart;
2737     ncols = rptr[i+1] - rptr[i];
2738     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2739     vals += ncols;
2740     cols += ncols;
2741   }
2742   /* insert received matrix entries */
2743   for (imdex=0; imdex<nrecvs; imdex++){
2744     rstart = rowrange[recv_rank[imdex]];
2745     rend   = rowrange[recv_rank[imdex]+1];
2746     rptr = rbuf_j[imdex];
2747     cols = rbuf_j[imdex] + rend-rstart + 1;
2748     vals = rbuf_a[imdex];
2749     for (i=0; i<rend-rstart; i++){
2750       row   = i + rstart;
2751       ncols = rptr[i+1] - rptr[i];
2752       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2753       vals += ncols;
2754       cols += ncols;
2755     }
2756   }
2757   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2758   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2759   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2760   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2761   if (reuse == MAT_INITIAL_MATRIX) {
2762     PetscContainer container;
2763     *matredundant = C;
2764     /* create a supporting struct and attach it to C for reuse */
2765     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2766     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2767     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2768     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2769     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2770     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2771 
2772     redund->nzlocal = nzlocal;
2773     redund->nsends  = nsends;
2774     redund->nrecvs  = nrecvs;
2775     redund->send_rank = send_rank;
2776     redund->recv_rank = recv_rank;
2777     redund->sbuf_nz = sbuf_nz;
2778     redund->rbuf_nz = rbuf_nz;
2779     redund->sbuf_j  = sbuf_j;
2780     redund->sbuf_a  = sbuf_a;
2781     redund->rbuf_j  = rbuf_j;
2782     redund->rbuf_a  = rbuf_a;
2783 
2784     redund->Destroy = C->ops->destroy;
2785     C->ops->destroy = MatDestroy_MatRedundant;
2786   }
2787   PetscFunctionReturn(0);
2788 }
2789 
2790 #undef __FUNCT__
2791 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2792 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2793 {
2794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2795   PetscErrorCode ierr;
2796   PetscInt       i,*idxb = 0;
2797   PetscScalar    *va,*vb;
2798   Vec            vtmp;
2799 
2800   PetscFunctionBegin;
2801   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2802   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2803   if (idx) {
2804     for (i=0; i<A->rmap->n; i++) {
2805       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2806     }
2807   }
2808 
2809   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2810   if (idx) {
2811     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2812   }
2813   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2814   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2815 
2816   for (i=0; i<A->rmap->n; i++){
2817     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2818       va[i] = vb[i];
2819       if (idx) idx[i] = a->garray[idxb[i]];
2820     }
2821   }
2822 
2823   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2824   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2825   ierr = PetscFree(idxb);CHKERRQ(ierr);
2826   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2827   PetscFunctionReturn(0);
2828 }
2829 
2830 #undef __FUNCT__
2831 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2832 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2833 {
2834   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2835   PetscErrorCode ierr;
2836   PetscInt       i,*idxb = 0;
2837   PetscScalar    *va,*vb;
2838   Vec            vtmp;
2839 
2840   PetscFunctionBegin;
2841   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2842   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2843   if (idx) {
2844     for (i=0; i<A->cmap->n; i++) {
2845       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2846     }
2847   }
2848 
2849   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2850   if (idx) {
2851     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2852   }
2853   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2854   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2855 
2856   for (i=0; i<A->rmap->n; i++){
2857     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2858       va[i] = vb[i];
2859       if (idx) idx[i] = a->garray[idxb[i]];
2860     }
2861   }
2862 
2863   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2864   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2865   ierr = PetscFree(idxb);CHKERRQ(ierr);
2866   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2867   PetscFunctionReturn(0);
2868 }
2869 
2870 #undef __FUNCT__
2871 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2872 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2873 {
2874   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2875   PetscInt       n      = A->rmap->n;
2876   PetscInt       cstart = A->cmap->rstart;
2877   PetscInt      *cmap   = mat->garray;
2878   PetscInt      *diagIdx, *offdiagIdx;
2879   Vec            diagV, offdiagV;
2880   PetscScalar   *a, *diagA, *offdiagA;
2881   PetscInt       r;
2882   PetscErrorCode ierr;
2883 
2884   PetscFunctionBegin;
2885   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2886   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2887   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2888   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2889   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2890   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2891   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2892   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2893   for(r = 0; r < n; ++r) {
2894     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2895       a[r]   = diagA[r];
2896       idx[r] = cstart + diagIdx[r];
2897     } else {
2898       a[r]   = offdiagA[r];
2899       idx[r] = cmap[offdiagIdx[r]];
2900     }
2901   }
2902   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2903   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2904   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2905   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2906   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2907   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2908   PetscFunctionReturn(0);
2909 }
2910 
2911 #undef __FUNCT__
2912 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2913 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2914 {
2915   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2916   PetscInt       n      = A->rmap->n;
2917   PetscInt       cstart = A->cmap->rstart;
2918   PetscInt      *cmap   = mat->garray;
2919   PetscInt      *diagIdx, *offdiagIdx;
2920   Vec            diagV, offdiagV;
2921   PetscScalar   *a, *diagA, *offdiagA;
2922   PetscInt       r;
2923   PetscErrorCode ierr;
2924 
2925   PetscFunctionBegin;
2926   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2927   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2928   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2929   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2930   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2931   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2932   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2933   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2934   for(r = 0; r < n; ++r) {
2935     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2936       a[r]   = diagA[r];
2937       idx[r] = cstart + diagIdx[r];
2938     } else {
2939       a[r]   = offdiagA[r];
2940       idx[r] = cmap[offdiagIdx[r]];
2941     }
2942   }
2943   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2944   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2945   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2946   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2947   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2948   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2949   PetscFunctionReturn(0);
2950 }
2951 
2952 #undef __FUNCT__
2953 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2954 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2955 {
2956   PetscErrorCode ierr;
2957   Mat            *dummy;
2958 
2959   PetscFunctionBegin;
2960   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2961   *newmat = *dummy;
2962   ierr = PetscFree(dummy);CHKERRQ(ierr);
2963   PetscFunctionReturn(0);
2964 }
2965 
2966 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2967 
2968 #undef __FUNCT__
2969 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2970 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2971 {
2972   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2973   PetscErrorCode ierr;
2974 
2975   PetscFunctionBegin;
2976   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 
2981 /* -------------------------------------------------------------------*/
2982 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2983        MatGetRow_MPIAIJ,
2984        MatRestoreRow_MPIAIJ,
2985        MatMult_MPIAIJ,
2986 /* 4*/ MatMultAdd_MPIAIJ,
2987        MatMultTranspose_MPIAIJ,
2988        MatMultTransposeAdd_MPIAIJ,
2989 #ifdef PETSC_HAVE_PBGL
2990        MatSolve_MPIAIJ,
2991 #else
2992        0,
2993 #endif
2994        0,
2995        0,
2996 /*10*/ 0,
2997        0,
2998        0,
2999        MatSOR_MPIAIJ,
3000        MatTranspose_MPIAIJ,
3001 /*15*/ MatGetInfo_MPIAIJ,
3002        MatEqual_MPIAIJ,
3003        MatGetDiagonal_MPIAIJ,
3004        MatDiagonalScale_MPIAIJ,
3005        MatNorm_MPIAIJ,
3006 /*20*/ MatAssemblyBegin_MPIAIJ,
3007        MatAssemblyEnd_MPIAIJ,
3008        MatSetOption_MPIAIJ,
3009        MatZeroEntries_MPIAIJ,
3010 /*24*/ MatZeroRows_MPIAIJ,
3011        0,
3012 #ifdef PETSC_HAVE_PBGL
3013        0,
3014 #else
3015        0,
3016 #endif
3017        0,
3018        0,
3019 /*29*/ MatSetUpPreallocation_MPIAIJ,
3020 #ifdef PETSC_HAVE_PBGL
3021        0,
3022 #else
3023        0,
3024 #endif
3025        0,
3026        0,
3027        0,
3028 /*34*/ MatDuplicate_MPIAIJ,
3029        0,
3030        0,
3031        0,
3032        0,
3033 /*39*/ MatAXPY_MPIAIJ,
3034        MatGetSubMatrices_MPIAIJ,
3035        MatIncreaseOverlap_MPIAIJ,
3036        MatGetValues_MPIAIJ,
3037        MatCopy_MPIAIJ,
3038 /*44*/ MatGetRowMax_MPIAIJ,
3039        MatScale_MPIAIJ,
3040        0,
3041        0,
3042        MatZeroRowsColumns_MPIAIJ,
3043 /*49*/ MatSetBlockSize_MPIAIJ,
3044        0,
3045        0,
3046        0,
3047        0,
3048 /*54*/ MatFDColoringCreate_MPIAIJ,
3049        0,
3050        MatSetUnfactored_MPIAIJ,
3051        MatPermute_MPIAIJ,
3052        0,
3053 /*59*/ MatGetSubMatrix_MPIAIJ,
3054        MatDestroy_MPIAIJ,
3055        MatView_MPIAIJ,
3056        0,
3057        0,
3058 /*64*/ 0,
3059        0,
3060        0,
3061        0,
3062        0,
3063 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3064        MatGetRowMinAbs_MPIAIJ,
3065        0,
3066        MatSetColoring_MPIAIJ,
3067 #if defined(PETSC_HAVE_ADIC)
3068        MatSetValuesAdic_MPIAIJ,
3069 #else
3070        0,
3071 #endif
3072        MatSetValuesAdifor_MPIAIJ,
3073 /*75*/ MatFDColoringApply_AIJ,
3074        0,
3075        0,
3076        0,
3077        0,
3078 /*80*/ 0,
3079        0,
3080        0,
3081 /*83*/ MatLoad_MPIAIJ,
3082        0,
3083        0,
3084        0,
3085        0,
3086        0,
3087 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3088        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3089        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3090        MatPtAP_Basic,
3091        MatPtAPSymbolic_MPIAIJ,
3092 /*94*/ MatPtAPNumeric_MPIAIJ,
3093        0,
3094        0,
3095        0,
3096        0,
3097 /*99*/ 0,
3098        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3099        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3100        MatConjugate_MPIAIJ,
3101        0,
3102 /*104*/MatSetValuesRow_MPIAIJ,
3103        MatRealPart_MPIAIJ,
3104        MatImaginaryPart_MPIAIJ,
3105        0,
3106        0,
3107 /*109*/0,
3108        MatGetRedundantMatrix_MPIAIJ,
3109        MatGetRowMin_MPIAIJ,
3110        0,
3111        0,
3112 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3113        0,
3114        0,
3115        0,
3116        0,
3117 /*119*/0,
3118        0,
3119        0,
3120        0,
3121        MatGetMultiProcBlock_MPIAIJ,
3122 /*124*/MatFindNonZeroRows_MPIAIJ,
3123        MatGetColumnNorms_MPIAIJ,
3124        MatInvertBlockDiagonal_MPIAIJ,
3125        0,
3126        MatGetSubMatricesParallel_MPIAIJ,
3127 /*129*/0
3128 };
3129 
3130 /* ----------------------------------------------------------------------------------------*/
3131 
3132 EXTERN_C_BEGIN
3133 #undef __FUNCT__
3134 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3135 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3136 {
3137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3138   PetscErrorCode ierr;
3139 
3140   PetscFunctionBegin;
3141   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3142   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3143   PetscFunctionReturn(0);
3144 }
3145 EXTERN_C_END
3146 
3147 EXTERN_C_BEGIN
3148 #undef __FUNCT__
3149 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3150 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3151 {
3152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3153   PetscErrorCode ierr;
3154 
3155   PetscFunctionBegin;
3156   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3157   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3158   PetscFunctionReturn(0);
3159 }
3160 EXTERN_C_END
3161 
3162 EXTERN_C_BEGIN
3163 #undef __FUNCT__
3164 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3165 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3166 {
3167   Mat_MPIAIJ     *b;
3168   PetscErrorCode ierr;
3169   PetscInt       i;
3170 
3171   PetscFunctionBegin;
3172   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3173   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3174   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3175   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3176 
3177   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3178   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3179   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3180   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3181   if (d_nnz) {
3182     for (i=0; i<B->rmap->n; i++) {
3183       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3184     }
3185   }
3186   if (o_nnz) {
3187     for (i=0; i<B->rmap->n; i++) {
3188       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3189     }
3190   }
3191   b = (Mat_MPIAIJ*)B->data;
3192 
3193   if (!B->preallocated) {
3194     /* Explicitly create 2 MATSEQAIJ matrices. */
3195     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3196     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3197     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3198     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3199     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3200     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3201     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3202     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3203   }
3204 
3205   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3206   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3207   B->preallocated = PETSC_TRUE;
3208   PetscFunctionReturn(0);
3209 }
3210 EXTERN_C_END
3211 
3212 #undef __FUNCT__
3213 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3214 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3215 {
3216   Mat            mat;
3217   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3218   PetscErrorCode ierr;
3219 
3220   PetscFunctionBegin;
3221   *newmat       = 0;
3222   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3223   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3224   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3225   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3226   a    = (Mat_MPIAIJ*)mat->data;
3227 
3228   mat->factortype    = matin->factortype;
3229   mat->rmap->bs      = matin->rmap->bs;
3230   mat->assembled    = PETSC_TRUE;
3231   mat->insertmode   = NOT_SET_VALUES;
3232   mat->preallocated = PETSC_TRUE;
3233 
3234   a->size           = oldmat->size;
3235   a->rank           = oldmat->rank;
3236   a->donotstash     = oldmat->donotstash;
3237   a->roworiented    = oldmat->roworiented;
3238   a->rowindices     = 0;
3239   a->rowvalues      = 0;
3240   a->getrowactive   = PETSC_FALSE;
3241 
3242   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3243   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3244 
3245   if (oldmat->colmap) {
3246 #if defined (PETSC_USE_CTABLE)
3247     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3248 #else
3249     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3250     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3251     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3252 #endif
3253   } else a->colmap = 0;
3254   if (oldmat->garray) {
3255     PetscInt len;
3256     len  = oldmat->B->cmap->n;
3257     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3258     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3259     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3260   } else a->garray = 0;
3261 
3262   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3263   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3264   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3265   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3266   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3267   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3268   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3269   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3270   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3271   *newmat = mat;
3272   PetscFunctionReturn(0);
3273 }
3274 
3275 
3276 
3277 #undef __FUNCT__
3278 #define __FUNCT__ "MatLoad_MPIAIJ"
3279 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3280 {
3281   PetscScalar    *vals,*svals;
3282   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3283   PetscErrorCode ierr;
3284   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3285   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3286   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3287   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3288   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3289   int            fd;
3290 
3291   PetscFunctionBegin;
3292   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3293   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3294   if (!rank) {
3295     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3296     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3297     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3298   }
3299 
3300   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3301 
3302   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3303   M = header[1]; N = header[2];
3304   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3305   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3306   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3307 
3308   /* If global sizes are set, check if they are consistent with that given in the file */
3309   if (sizesset) {
3310     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3311   }
3312   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3313   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3314 
3315   /* determine ownership of all rows */
3316   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3317   else m = newMat->rmap->n; /* Set by user */
3318 
3319   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3320   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3321 
3322   /* First process needs enough room for process with most rows */
3323   if (!rank) {
3324     mmax       = rowners[1];
3325     for (i=2; i<size; i++) {
3326       mmax = PetscMax(mmax,rowners[i]);
3327     }
3328   } else mmax = m;
3329 
3330   rowners[0] = 0;
3331   for (i=2; i<=size; i++) {
3332     rowners[i] += rowners[i-1];
3333   }
3334   rstart = rowners[rank];
3335   rend   = rowners[rank+1];
3336 
3337   /* distribute row lengths to all processors */
3338   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3339   if (!rank) {
3340     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3341     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3342     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3343     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3344     for (j=0; j<m; j++) {
3345       procsnz[0] += ourlens[j];
3346     }
3347     for (i=1; i<size; i++) {
3348       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3349       /* calculate the number of nonzeros on each processor */
3350       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3351         procsnz[i] += rowlengths[j];
3352       }
3353       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3354     }
3355     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3356   } else {
3357     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3358   }
3359 
3360   if (!rank) {
3361     /* determine max buffer needed and allocate it */
3362     maxnz = 0;
3363     for (i=0; i<size; i++) {
3364       maxnz = PetscMax(maxnz,procsnz[i]);
3365     }
3366     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3367 
3368     /* read in my part of the matrix column indices  */
3369     nz   = procsnz[0];
3370     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3371     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3372 
3373     /* read in every one elses and ship off */
3374     for (i=1; i<size; i++) {
3375       nz     = procsnz[i];
3376       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3377       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3378     }
3379     ierr = PetscFree(cols);CHKERRQ(ierr);
3380   } else {
3381     /* determine buffer space needed for message */
3382     nz = 0;
3383     for (i=0; i<m; i++) {
3384       nz += ourlens[i];
3385     }
3386     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3387 
3388     /* receive message of column indices*/
3389     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3390   }
3391 
3392   /* determine column ownership if matrix is not square */
3393   if (N != M) {
3394     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3395     else n = newMat->cmap->n;
3396     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3397     cstart = cend - n;
3398   } else {
3399     cstart = rstart;
3400     cend   = rend;
3401     n      = cend - cstart;
3402   }
3403 
3404   /* loop over local rows, determining number of off diagonal entries */
3405   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3406   jj = 0;
3407   for (i=0; i<m; i++) {
3408     for (j=0; j<ourlens[i]; j++) {
3409       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3410       jj++;
3411     }
3412   }
3413 
3414   for (i=0; i<m; i++) {
3415     ourlens[i] -= offlens[i];
3416   }
3417   if (!sizesset) {
3418     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3419   }
3420   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3421 
3422   for (i=0; i<m; i++) {
3423     ourlens[i] += offlens[i];
3424   }
3425 
3426   if (!rank) {
3427     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3428 
3429     /* read in my part of the matrix numerical values  */
3430     nz   = procsnz[0];
3431     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3432 
3433     /* insert into matrix */
3434     jj      = rstart;
3435     smycols = mycols;
3436     svals   = vals;
3437     for (i=0; i<m; i++) {
3438       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3439       smycols += ourlens[i];
3440       svals   += ourlens[i];
3441       jj++;
3442     }
3443 
3444     /* read in other processors and ship out */
3445     for (i=1; i<size; i++) {
3446       nz     = procsnz[i];
3447       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3448       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3449     }
3450     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3451   } else {
3452     /* receive numeric values */
3453     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3454 
3455     /* receive message of values*/
3456     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3457 
3458     /* insert into matrix */
3459     jj      = rstart;
3460     smycols = mycols;
3461     svals   = vals;
3462     for (i=0; i<m; i++) {
3463       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3464       smycols += ourlens[i];
3465       svals   += ourlens[i];
3466       jj++;
3467     }
3468   }
3469   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3470   ierr = PetscFree(vals);CHKERRQ(ierr);
3471   ierr = PetscFree(mycols);CHKERRQ(ierr);
3472   ierr = PetscFree(rowners);CHKERRQ(ierr);
3473 
3474   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3475   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3476   PetscFunctionReturn(0);
3477 }
3478 
3479 #undef __FUNCT__
3480 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3481 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3482 {
3483   PetscErrorCode ierr;
3484   IS             iscol_local;
3485   PetscInt       csize;
3486 
3487   PetscFunctionBegin;
3488   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3489   if (call == MAT_REUSE_MATRIX) {
3490     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3491     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3492   } else {
3493     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3494   }
3495   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3496   if (call == MAT_INITIAL_MATRIX) {
3497     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3498     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3499   }
3500   PetscFunctionReturn(0);
3501 }
3502 
3503 #undef __FUNCT__
3504 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3505 /*
3506     Not great since it makes two copies of the submatrix, first an SeqAIJ
3507   in local and then by concatenating the local matrices the end result.
3508   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3509 
3510   Note: This requires a sequential iscol with all indices.
3511 */
3512 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3513 {
3514   PetscErrorCode ierr;
3515   PetscMPIInt    rank,size;
3516   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3517   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3518   Mat            *local,M,Mreuse;
3519   MatScalar      *vwork,*aa;
3520   MPI_Comm       comm = ((PetscObject)mat)->comm;
3521   Mat_SeqAIJ     *aij;
3522 
3523 
3524   PetscFunctionBegin;
3525   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3527 
3528   if (call ==  MAT_REUSE_MATRIX) {
3529     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3530     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3531     local = &Mreuse;
3532     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3533   } else {
3534     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3535     Mreuse = *local;
3536     ierr   = PetscFree(local);CHKERRQ(ierr);
3537   }
3538 
3539   /*
3540       m - number of local rows
3541       n - number of columns (same on all processors)
3542       rstart - first row in new global matrix generated
3543   */
3544   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3545   if (call == MAT_INITIAL_MATRIX) {
3546     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3547     ii  = aij->i;
3548     jj  = aij->j;
3549 
3550     /*
3551         Determine the number of non-zeros in the diagonal and off-diagonal
3552         portions of the matrix in order to do correct preallocation
3553     */
3554 
3555     /* first get start and end of "diagonal" columns */
3556     if (csize == PETSC_DECIDE) {
3557       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3558       if (mglobal == n) { /* square matrix */
3559 	nlocal = m;
3560       } else {
3561         nlocal = n/size + ((n % size) > rank);
3562       }
3563     } else {
3564       nlocal = csize;
3565     }
3566     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3567     rstart = rend - nlocal;
3568     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3569 
3570     /* next, compute all the lengths */
3571     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3572     olens = dlens + m;
3573     for (i=0; i<m; i++) {
3574       jend = ii[i+1] - ii[i];
3575       olen = 0;
3576       dlen = 0;
3577       for (j=0; j<jend; j++) {
3578         if (*jj < rstart || *jj >= rend) olen++;
3579         else dlen++;
3580         jj++;
3581       }
3582       olens[i] = olen;
3583       dlens[i] = dlen;
3584     }
3585     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3586     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3587     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3588     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3589     ierr = PetscFree(dlens);CHKERRQ(ierr);
3590   } else {
3591     PetscInt ml,nl;
3592 
3593     M = *newmat;
3594     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3595     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3596     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3597     /*
3598          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3599        rather than the slower MatSetValues().
3600     */
3601     M->was_assembled = PETSC_TRUE;
3602     M->assembled     = PETSC_FALSE;
3603   }
3604   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3605   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3606   ii  = aij->i;
3607   jj  = aij->j;
3608   aa  = aij->a;
3609   for (i=0; i<m; i++) {
3610     row   = rstart + i;
3611     nz    = ii[i+1] - ii[i];
3612     cwork = jj;     jj += nz;
3613     vwork = aa;     aa += nz;
3614     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3615   }
3616 
3617   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3618   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3619   *newmat = M;
3620 
3621   /* save submatrix used in processor for next request */
3622   if (call ==  MAT_INITIAL_MATRIX) {
3623     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3624     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3625   }
3626 
3627   PetscFunctionReturn(0);
3628 }
3629 
3630 EXTERN_C_BEGIN
3631 #undef __FUNCT__
3632 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3633 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3634 {
3635   PetscInt       m,cstart, cend,j,nnz,i,d;
3636   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3637   const PetscInt *JJ;
3638   PetscScalar    *values;
3639   PetscErrorCode ierr;
3640 
3641   PetscFunctionBegin;
3642   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3643 
3644   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3645   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3646   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3647   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3648   m      = B->rmap->n;
3649   cstart = B->cmap->rstart;
3650   cend   = B->cmap->rend;
3651   rstart = B->rmap->rstart;
3652 
3653   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3654 
3655 #if defined(PETSC_USE_DEBUGGING)
3656   for (i=0; i<m; i++) {
3657     nnz     = Ii[i+1]- Ii[i];
3658     JJ      = J + Ii[i];
3659     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3660     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3661     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3662   }
3663 #endif
3664 
3665   for (i=0; i<m; i++) {
3666     nnz     = Ii[i+1]- Ii[i];
3667     JJ      = J + Ii[i];
3668     nnz_max = PetscMax(nnz_max,nnz);
3669     d       = 0;
3670     for (j=0; j<nnz; j++) {
3671       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3672     }
3673     d_nnz[i] = d;
3674     o_nnz[i] = nnz - d;
3675   }
3676   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3677   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3678 
3679   if (v) values = (PetscScalar*)v;
3680   else {
3681     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3682     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3683   }
3684 
3685   for (i=0; i<m; i++) {
3686     ii   = i + rstart;
3687     nnz  = Ii[i+1]- Ii[i];
3688     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3689   }
3690   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3691   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3692 
3693   if (!v) {
3694     ierr = PetscFree(values);CHKERRQ(ierr);
3695   }
3696   PetscFunctionReturn(0);
3697 }
3698 EXTERN_C_END
3699 
3700 #undef __FUNCT__
3701 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3702 /*@
3703    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3704    (the default parallel PETSc format).
3705 
3706    Collective on MPI_Comm
3707 
3708    Input Parameters:
3709 +  B - the matrix
3710 .  i - the indices into j for the start of each local row (starts with zero)
3711 .  j - the column indices for each local row (starts with zero)
3712 -  v - optional values in the matrix
3713 
3714    Level: developer
3715 
3716    Notes:
3717        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3718      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3719      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3720 
3721        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3722 
3723        The format which is used for the sparse matrix input, is equivalent to a
3724     row-major ordering.. i.e for the following matrix, the input data expected is
3725     as shown:
3726 
3727         1 0 0
3728         2 0 3     P0
3729        -------
3730         4 5 6     P1
3731 
3732      Process0 [P0]: rows_owned=[0,1]
3733         i =  {0,1,3}  [size = nrow+1  = 2+1]
3734         j =  {0,0,2}  [size = nz = 6]
3735         v =  {1,2,3}  [size = nz = 6]
3736 
3737      Process1 [P1]: rows_owned=[2]
3738         i =  {0,3}    [size = nrow+1  = 1+1]
3739         j =  {0,1,2}  [size = nz = 6]
3740         v =  {4,5,6}  [size = nz = 6]
3741 
3742 .keywords: matrix, aij, compressed row, sparse, parallel
3743 
3744 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3745           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3746 @*/
3747 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3748 {
3749   PetscErrorCode ierr;
3750 
3751   PetscFunctionBegin;
3752   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3753   PetscFunctionReturn(0);
3754 }
3755 
3756 #undef __FUNCT__
3757 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3758 /*@C
3759    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3760    (the default parallel PETSc format).  For good matrix assembly performance
3761    the user should preallocate the matrix storage by setting the parameters
3762    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3763    performance can be increased by more than a factor of 50.
3764 
3765    Collective on MPI_Comm
3766 
3767    Input Parameters:
3768 +  A - the matrix
3769 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3770            (same value is used for all local rows)
3771 .  d_nnz - array containing the number of nonzeros in the various rows of the
3772            DIAGONAL portion of the local submatrix (possibly different for each row)
3773            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3774            The size of this array is equal to the number of local rows, i.e 'm'.
3775            You must leave room for the diagonal entry even if it is zero.
3776 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3777            submatrix (same value is used for all local rows).
3778 -  o_nnz - array containing the number of nonzeros in the various rows of the
3779            OFF-DIAGONAL portion of the local submatrix (possibly different for
3780            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3781            structure. The size of this array is equal to the number
3782            of local rows, i.e 'm'.
3783 
3784    If the *_nnz parameter is given then the *_nz parameter is ignored
3785 
3786    The AIJ format (also called the Yale sparse matrix format or
3787    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3788    storage.  The stored row and column indices begin with zero.
3789    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3790 
3791    The parallel matrix is partitioned such that the first m0 rows belong to
3792    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3793    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3794 
3795    The DIAGONAL portion of the local submatrix of a processor can be defined
3796    as the submatrix which is obtained by extraction the part corresponding to
3797    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3798    first row that belongs to the processor, r2 is the last row belonging to
3799    the this processor, and c1-c2 is range of indices of the local part of a
3800    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3801    common case of a square matrix, the row and column ranges are the same and
3802    the DIAGONAL part is also square. The remaining portion of the local
3803    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3804 
3805    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3806 
3807    You can call MatGetInfo() to get information on how effective the preallocation was;
3808    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3809    You can also run with the option -info and look for messages with the string
3810    malloc in them to see if additional memory allocation was needed.
3811 
3812    Example usage:
3813 
3814    Consider the following 8x8 matrix with 34 non-zero values, that is
3815    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3816    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3817    as follows:
3818 
3819 .vb
3820             1  2  0  |  0  3  0  |  0  4
3821     Proc0   0  5  6  |  7  0  0  |  8  0
3822             9  0 10  | 11  0  0  | 12  0
3823     -------------------------------------
3824            13  0 14  | 15 16 17  |  0  0
3825     Proc1   0 18  0  | 19 20 21  |  0  0
3826             0  0  0  | 22 23  0  | 24  0
3827     -------------------------------------
3828     Proc2  25 26 27  |  0  0 28  | 29  0
3829            30  0  0  | 31 32 33  |  0 34
3830 .ve
3831 
3832    This can be represented as a collection of submatrices as:
3833 
3834 .vb
3835       A B C
3836       D E F
3837       G H I
3838 .ve
3839 
3840    Where the submatrices A,B,C are owned by proc0, D,E,F are
3841    owned by proc1, G,H,I are owned by proc2.
3842 
3843    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3844    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3845    The 'M','N' parameters are 8,8, and have the same values on all procs.
3846 
3847    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3848    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3849    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3850    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3851    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3852    matrix, ans [DF] as another SeqAIJ matrix.
3853 
3854    When d_nz, o_nz parameters are specified, d_nz storage elements are
3855    allocated for every row of the local diagonal submatrix, and o_nz
3856    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3857    One way to choose d_nz and o_nz is to use the max nonzerors per local
3858    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3859    In this case, the values of d_nz,o_nz are:
3860 .vb
3861      proc0 : dnz = 2, o_nz = 2
3862      proc1 : dnz = 3, o_nz = 2
3863      proc2 : dnz = 1, o_nz = 4
3864 .ve
3865    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3866    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3867    for proc3. i.e we are using 12+15+10=37 storage locations to store
3868    34 values.
3869 
3870    When d_nnz, o_nnz parameters are specified, the storage is specified
3871    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3872    In the above case the values for d_nnz,o_nnz are:
3873 .vb
3874      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3875      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3876      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3877 .ve
3878    Here the space allocated is sum of all the above values i.e 34, and
3879    hence pre-allocation is perfect.
3880 
3881    Level: intermediate
3882 
3883 .keywords: matrix, aij, compressed row, sparse, parallel
3884 
3885 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3886           MPIAIJ, MatGetInfo()
3887 @*/
3888 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3889 {
3890   PetscErrorCode ierr;
3891 
3892   PetscFunctionBegin;
3893   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3894   PetscFunctionReturn(0);
3895 }
3896 
3897 #undef __FUNCT__
3898 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3899 /*@
3900      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3901          CSR format the local rows.
3902 
3903    Collective on MPI_Comm
3904 
3905    Input Parameters:
3906 +  comm - MPI communicator
3907 .  m - number of local rows (Cannot be PETSC_DECIDE)
3908 .  n - This value should be the same as the local size used in creating the
3909        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3910        calculated if N is given) For square matrices n is almost always m.
3911 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3912 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3913 .   i - row indices
3914 .   j - column indices
3915 -   a - matrix values
3916 
3917    Output Parameter:
3918 .   mat - the matrix
3919 
3920    Level: intermediate
3921 
3922    Notes:
3923        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3924      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3925      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3926 
3927        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3928 
3929        The format which is used for the sparse matrix input, is equivalent to a
3930     row-major ordering.. i.e for the following matrix, the input data expected is
3931     as shown:
3932 
3933         1 0 0
3934         2 0 3     P0
3935        -------
3936         4 5 6     P1
3937 
3938      Process0 [P0]: rows_owned=[0,1]
3939         i =  {0,1,3}  [size = nrow+1  = 2+1]
3940         j =  {0,0,2}  [size = nz = 6]
3941         v =  {1,2,3}  [size = nz = 6]
3942 
3943      Process1 [P1]: rows_owned=[2]
3944         i =  {0,3}    [size = nrow+1  = 1+1]
3945         j =  {0,1,2}  [size = nz = 6]
3946         v =  {4,5,6}  [size = nz = 6]
3947 
3948 .keywords: matrix, aij, compressed row, sparse, parallel
3949 
3950 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3951           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3952 @*/
3953 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3954 {
3955   PetscErrorCode ierr;
3956 
3957  PetscFunctionBegin;
3958   if (i[0]) {
3959     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3960   }
3961   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3962   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3963   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3964   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3965   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3966   PetscFunctionReturn(0);
3967 }
3968 
3969 #undef __FUNCT__
3970 #define __FUNCT__ "MatCreateMPIAIJ"
3971 /*@C
3972    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3973    (the default parallel PETSc format).  For good matrix assembly performance
3974    the user should preallocate the matrix storage by setting the parameters
3975    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3976    performance can be increased by more than a factor of 50.
3977 
3978    Collective on MPI_Comm
3979 
3980    Input Parameters:
3981 +  comm - MPI communicator
3982 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3983            This value should be the same as the local size used in creating the
3984            y vector for the matrix-vector product y = Ax.
3985 .  n - This value should be the same as the local size used in creating the
3986        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3987        calculated if N is given) For square matrices n is almost always m.
3988 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3989 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3990 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3991            (same value is used for all local rows)
3992 .  d_nnz - array containing the number of nonzeros in the various rows of the
3993            DIAGONAL portion of the local submatrix (possibly different for each row)
3994            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3995            The size of this array is equal to the number of local rows, i.e 'm'.
3996            You must leave room for the diagonal entry even if it is zero.
3997 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3998            submatrix (same value is used for all local rows).
3999 -  o_nnz - array containing the number of nonzeros in the various rows of the
4000            OFF-DIAGONAL portion of the local submatrix (possibly different for
4001            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4002            structure. The size of this array is equal to the number
4003            of local rows, i.e 'm'.
4004 
4005    Output Parameter:
4006 .  A - the matrix
4007 
4008    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4009    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4010    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4011 
4012    Notes:
4013    If the *_nnz parameter is given then the *_nz parameter is ignored
4014 
4015    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4016    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4017    storage requirements for this matrix.
4018 
4019    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4020    processor than it must be used on all processors that share the object for
4021    that argument.
4022 
4023    The user MUST specify either the local or global matrix dimensions
4024    (possibly both).
4025 
4026    The parallel matrix is partitioned across processors such that the
4027    first m0 rows belong to process 0, the next m1 rows belong to
4028    process 1, the next m2 rows belong to process 2 etc.. where
4029    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4030    values corresponding to [m x N] submatrix.
4031 
4032    The columns are logically partitioned with the n0 columns belonging
4033    to 0th partition, the next n1 columns belonging to the next
4034    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4035 
4036    The DIAGONAL portion of the local submatrix on any given processor
4037    is the submatrix corresponding to the rows and columns m,n
4038    corresponding to the given processor. i.e diagonal matrix on
4039    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4040    etc. The remaining portion of the local submatrix [m x (N-n)]
4041    constitute the OFF-DIAGONAL portion. The example below better
4042    illustrates this concept.
4043 
4044    For a square global matrix we define each processor's diagonal portion
4045    to be its local rows and the corresponding columns (a square submatrix);
4046    each processor's off-diagonal portion encompasses the remainder of the
4047    local matrix (a rectangular submatrix).
4048 
4049    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4050 
4051    When calling this routine with a single process communicator, a matrix of
4052    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4053    type of communicator, use the construction mechanism:
4054      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4055 
4056    By default, this format uses inodes (identical nodes) when possible.
4057    We search for consecutive rows with the same nonzero structure, thereby
4058    reusing matrix information to achieve increased efficiency.
4059 
4060    Options Database Keys:
4061 +  -mat_no_inode  - Do not use inodes
4062 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4063 -  -mat_aij_oneindex - Internally use indexing starting at 1
4064         rather than 0.  Note that when calling MatSetValues(),
4065         the user still MUST index entries starting at 0!
4066 
4067 
4068    Example usage:
4069 
4070    Consider the following 8x8 matrix with 34 non-zero values, that is
4071    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4072    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4073    as follows:
4074 
4075 .vb
4076             1  2  0  |  0  3  0  |  0  4
4077     Proc0   0  5  6  |  7  0  0  |  8  0
4078             9  0 10  | 11  0  0  | 12  0
4079     -------------------------------------
4080            13  0 14  | 15 16 17  |  0  0
4081     Proc1   0 18  0  | 19 20 21  |  0  0
4082             0  0  0  | 22 23  0  | 24  0
4083     -------------------------------------
4084     Proc2  25 26 27  |  0  0 28  | 29  0
4085            30  0  0  | 31 32 33  |  0 34
4086 .ve
4087 
4088    This can be represented as a collection of submatrices as:
4089 
4090 .vb
4091       A B C
4092       D E F
4093       G H I
4094 .ve
4095 
4096    Where the submatrices A,B,C are owned by proc0, D,E,F are
4097    owned by proc1, G,H,I are owned by proc2.
4098 
4099    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4100    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4101    The 'M','N' parameters are 8,8, and have the same values on all procs.
4102 
4103    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4104    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4105    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4106    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4107    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4108    matrix, ans [DF] as another SeqAIJ matrix.
4109 
4110    When d_nz, o_nz parameters are specified, d_nz storage elements are
4111    allocated for every row of the local diagonal submatrix, and o_nz
4112    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4113    One way to choose d_nz and o_nz is to use the max nonzerors per local
4114    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4115    In this case, the values of d_nz,o_nz are:
4116 .vb
4117      proc0 : dnz = 2, o_nz = 2
4118      proc1 : dnz = 3, o_nz = 2
4119      proc2 : dnz = 1, o_nz = 4
4120 .ve
4121    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4122    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4123    for proc3. i.e we are using 12+15+10=37 storage locations to store
4124    34 values.
4125 
4126    When d_nnz, o_nnz parameters are specified, the storage is specified
4127    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4128    In the above case the values for d_nnz,o_nnz are:
4129 .vb
4130      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4131      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4132      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4133 .ve
4134    Here the space allocated is sum of all the above values i.e 34, and
4135    hence pre-allocation is perfect.
4136 
4137    Level: intermediate
4138 
4139 .keywords: matrix, aij, compressed row, sparse, parallel
4140 
4141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4142           MPIAIJ, MatCreateMPIAIJWithArrays()
4143 @*/
4144 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4145 {
4146   PetscErrorCode ierr;
4147   PetscMPIInt    size;
4148 
4149   PetscFunctionBegin;
4150   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4151   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4152   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4153   if (size > 1) {
4154     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4155     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4156   } else {
4157     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4158     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4159   }
4160   PetscFunctionReturn(0);
4161 }
4162 
4163 #undef __FUNCT__
4164 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4165 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4166 {
4167   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4168 
4169   PetscFunctionBegin;
4170   *Ad     = a->A;
4171   *Ao     = a->B;
4172   *colmap = a->garray;
4173   PetscFunctionReturn(0);
4174 }
4175 
4176 #undef __FUNCT__
4177 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4178 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4179 {
4180   PetscErrorCode ierr;
4181   PetscInt       i;
4182   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4183 
4184   PetscFunctionBegin;
4185   if (coloring->ctype == IS_COLORING_GLOBAL) {
4186     ISColoringValue *allcolors,*colors;
4187     ISColoring      ocoloring;
4188 
4189     /* set coloring for diagonal portion */
4190     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4191 
4192     /* set coloring for off-diagonal portion */
4193     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4194     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4195     for (i=0; i<a->B->cmap->n; i++) {
4196       colors[i] = allcolors[a->garray[i]];
4197     }
4198     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4199     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4200     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4201     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4202   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4203     ISColoringValue *colors;
4204     PetscInt        *larray;
4205     ISColoring      ocoloring;
4206 
4207     /* set coloring for diagonal portion */
4208     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4209     for (i=0; i<a->A->cmap->n; i++) {
4210       larray[i] = i + A->cmap->rstart;
4211     }
4212     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4213     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4214     for (i=0; i<a->A->cmap->n; i++) {
4215       colors[i] = coloring->colors[larray[i]];
4216     }
4217     ierr = PetscFree(larray);CHKERRQ(ierr);
4218     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4219     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4220     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4221 
4222     /* set coloring for off-diagonal portion */
4223     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4224     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4225     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4226     for (i=0; i<a->B->cmap->n; i++) {
4227       colors[i] = coloring->colors[larray[i]];
4228     }
4229     ierr = PetscFree(larray);CHKERRQ(ierr);
4230     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4231     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4232     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4233   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4234 
4235   PetscFunctionReturn(0);
4236 }
4237 
4238 #if defined(PETSC_HAVE_ADIC)
4239 #undef __FUNCT__
4240 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4241 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4242 {
4243   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4244   PetscErrorCode ierr;
4245 
4246   PetscFunctionBegin;
4247   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4248   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4249   PetscFunctionReturn(0);
4250 }
4251 #endif
4252 
4253 #undef __FUNCT__
4254 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4255 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4256 {
4257   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4258   PetscErrorCode ierr;
4259 
4260   PetscFunctionBegin;
4261   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4262   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4263   PetscFunctionReturn(0);
4264 }
4265 
4266 #undef __FUNCT__
4267 #define __FUNCT__ "MatMerge"
4268 /*@
4269       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4270                  matrices from each processor
4271 
4272     Collective on MPI_Comm
4273 
4274    Input Parameters:
4275 +    comm - the communicators the parallel matrix will live on
4276 .    inmat - the input sequential matrices
4277 .    n - number of local columns (or PETSC_DECIDE)
4278 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4279 
4280    Output Parameter:
4281 .    outmat - the parallel matrix generated
4282 
4283     Level: advanced
4284 
4285    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4286 
4287 @*/
4288 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4289 {
4290   PetscErrorCode ierr;
4291   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4292   PetscInt       *indx;
4293   PetscScalar    *values;
4294 
4295   PetscFunctionBegin;
4296   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4297   if (scall == MAT_INITIAL_MATRIX){
4298     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4299     if (n == PETSC_DECIDE){
4300       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4301     }
4302     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4303     rstart -= m;
4304 
4305     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4306     for (i=0;i<m;i++) {
4307       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4308       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4309       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4310     }
4311     /* This routine will ONLY return MPIAIJ type matrix */
4312     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4313     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4314     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4315     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4316     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4317 
4318   } else if (scall == MAT_REUSE_MATRIX){
4319     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4320   } else {
4321     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4322   }
4323 
4324   for (i=0;i<m;i++) {
4325     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4326     Ii    = i + rstart;
4327     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4328     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4329   }
4330   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4331   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4332   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4333 
4334   PetscFunctionReturn(0);
4335 }
4336 
4337 #undef __FUNCT__
4338 #define __FUNCT__ "MatFileSplit"
4339 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4340 {
4341   PetscErrorCode    ierr;
4342   PetscMPIInt       rank;
4343   PetscInt          m,N,i,rstart,nnz;
4344   size_t            len;
4345   const PetscInt    *indx;
4346   PetscViewer       out;
4347   char              *name;
4348   Mat               B;
4349   const PetscScalar *values;
4350 
4351   PetscFunctionBegin;
4352   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4353   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4354   /* Should this be the type of the diagonal block of A? */
4355   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4356   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4357   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4358   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4359   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4360   for (i=0;i<m;i++) {
4361     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4362     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4363     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4364   }
4365   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4366   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4367 
4368   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4369   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4370   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4371   sprintf(name,"%s.%d",outfile,rank);
4372   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4373   ierr = PetscFree(name);
4374   ierr = MatView(B,out);CHKERRQ(ierr);
4375   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4376   ierr = MatDestroy(&B);CHKERRQ(ierr);
4377   PetscFunctionReturn(0);
4378 }
4379 
4380 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4381 #undef __FUNCT__
4382 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4383 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4384 {
4385   PetscErrorCode       ierr;
4386   Mat_Merge_SeqsToMPI  *merge;
4387   PetscContainer       container;
4388 
4389   PetscFunctionBegin;
4390   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4391   if (container) {
4392     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4393     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4394     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4395     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4396     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4397     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4398     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4399     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4400     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4401     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4402     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4403     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4404     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4405     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4406     ierr = PetscFree(merge);CHKERRQ(ierr);
4407     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4408   }
4409   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4410   PetscFunctionReturn(0);
4411 }
4412 
4413 #include <../src/mat/utils/freespace.h>
4414 #include <petscbt.h>
4415 
4416 #undef __FUNCT__
4417 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4418 /*@C
4419       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4420                  matrices from each processor
4421 
4422     Collective on MPI_Comm
4423 
4424    Input Parameters:
4425 +    comm - the communicators the parallel matrix will live on
4426 .    seqmat - the input sequential matrices
4427 .    m - number of local rows (or PETSC_DECIDE)
4428 .    n - number of local columns (or PETSC_DECIDE)
4429 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4430 
4431    Output Parameter:
4432 .    mpimat - the parallel matrix generated
4433 
4434     Level: advanced
4435 
4436    Notes:
4437      The dimensions of the sequential matrix in each processor MUST be the same.
4438      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4439      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4440 @*/
4441 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4442 {
4443   PetscErrorCode       ierr;
4444   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4445   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4446   PetscMPIInt          size,rank,taga,*len_s;
4447   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4448   PetscInt             proc,m;
4449   PetscInt             **buf_ri,**buf_rj;
4450   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4451   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4452   MPI_Request          *s_waits,*r_waits;
4453   MPI_Status           *status;
4454   MatScalar            *aa=a->a;
4455   MatScalar            **abuf_r,*ba_i;
4456   Mat_Merge_SeqsToMPI  *merge;
4457   PetscContainer       container;
4458 
4459   PetscFunctionBegin;
4460   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4461 
4462   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4463   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4464 
4465   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4466   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4467 
4468   bi     = merge->bi;
4469   bj     = merge->bj;
4470   buf_ri = merge->buf_ri;
4471   buf_rj = merge->buf_rj;
4472 
4473   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4474   owners = merge->rowmap->range;
4475   len_s  = merge->len_s;
4476 
4477   /* send and recv matrix values */
4478   /*-----------------------------*/
4479   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4480   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4481 
4482   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4483   for (proc=0,k=0; proc<size; proc++){
4484     if (!len_s[proc]) continue;
4485     i = owners[proc];
4486     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4487     k++;
4488   }
4489 
4490   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4491   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4492   ierr = PetscFree(status);CHKERRQ(ierr);
4493 
4494   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4495   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4496 
4497   /* insert mat values of mpimat */
4498   /*----------------------------*/
4499   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4500   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4501 
4502   for (k=0; k<merge->nrecv; k++){
4503     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4504     nrows = *(buf_ri_k[k]);
4505     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4506     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4507   }
4508 
4509   /* set values of ba */
4510   m = merge->rowmap->n;
4511   for (i=0; i<m; i++) {
4512     arow = owners[rank] + i;
4513     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4514     bnzi = bi[i+1] - bi[i];
4515     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4516 
4517     /* add local non-zero vals of this proc's seqmat into ba */
4518     anzi = ai[arow+1] - ai[arow];
4519     aj   = a->j + ai[arow];
4520     aa   = a->a + ai[arow];
4521     nextaj = 0;
4522     for (j=0; nextaj<anzi; j++){
4523       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4524         ba_i[j] += aa[nextaj++];
4525       }
4526     }
4527 
4528     /* add received vals into ba */
4529     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4530       /* i-th row */
4531       if (i == *nextrow[k]) {
4532         anzi = *(nextai[k]+1) - *nextai[k];
4533         aj   = buf_rj[k] + *(nextai[k]);
4534         aa   = abuf_r[k] + *(nextai[k]);
4535         nextaj = 0;
4536         for (j=0; nextaj<anzi; j++){
4537           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4538             ba_i[j] += aa[nextaj++];
4539           }
4540         }
4541         nextrow[k]++; nextai[k]++;
4542       }
4543     }
4544     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4545   }
4546   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4547   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4548 
4549   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4550   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4551   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4552   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4553   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4554   PetscFunctionReturn(0);
4555 }
4556 
4557 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4558 
4559 #undef __FUNCT__
4560 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4561 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4562 {
4563   PetscErrorCode       ierr;
4564   Mat                  B_mpi;
4565   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4566   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4567   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4568   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4569   PetscInt             len,proc,*dnz,*onz;
4570   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4571   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4572   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4573   MPI_Status           *status;
4574   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4575   PetscBT              lnkbt;
4576   Mat_Merge_SeqsToMPI  *merge;
4577   PetscContainer       container;
4578 
4579   PetscFunctionBegin;
4580   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4581 
4582   /* make sure it is a PETSc comm */
4583   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4584   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4585   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4586 
4587   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4588   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4589 
4590   /* determine row ownership */
4591   /*---------------------------------------------------------*/
4592   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4593   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4594   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4595   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4596   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4597   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4598   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4599 
4600   m      = merge->rowmap->n;
4601   M      = merge->rowmap->N;
4602   owners = merge->rowmap->range;
4603 
4604   /* determine the number of messages to send, their lengths */
4605   /*---------------------------------------------------------*/
4606   len_s  = merge->len_s;
4607 
4608   len = 0;  /* length of buf_si[] */
4609   merge->nsend = 0;
4610   for (proc=0; proc<size; proc++){
4611     len_si[proc] = 0;
4612     if (proc == rank){
4613       len_s[proc] = 0;
4614     } else {
4615       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4616       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4617     }
4618     if (len_s[proc]) {
4619       merge->nsend++;
4620       nrows = 0;
4621       for (i=owners[proc]; i<owners[proc+1]; i++){
4622         if (ai[i+1] > ai[i]) nrows++;
4623       }
4624       len_si[proc] = 2*(nrows+1);
4625       len += len_si[proc];
4626     }
4627   }
4628 
4629   /* determine the number and length of messages to receive for ij-structure */
4630   /*-------------------------------------------------------------------------*/
4631   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4632   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4633 
4634   /* post the Irecv of j-structure */
4635   /*-------------------------------*/
4636   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4637   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4638 
4639   /* post the Isend of j-structure */
4640   /*--------------------------------*/
4641   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4642 
4643   for (proc=0, k=0; proc<size; proc++){
4644     if (!len_s[proc]) continue;
4645     i = owners[proc];
4646     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4647     k++;
4648   }
4649 
4650   /* receives and sends of j-structure are complete */
4651   /*------------------------------------------------*/
4652   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4653   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4654 
4655   /* send and recv i-structure */
4656   /*---------------------------*/
4657   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4658   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4659 
4660   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4661   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4662   for (proc=0,k=0; proc<size; proc++){
4663     if (!len_s[proc]) continue;
4664     /* form outgoing message for i-structure:
4665          buf_si[0]:                 nrows to be sent
4666                [1:nrows]:           row index (global)
4667                [nrows+1:2*nrows+1]: i-structure index
4668     */
4669     /*-------------------------------------------*/
4670     nrows = len_si[proc]/2 - 1;
4671     buf_si_i    = buf_si + nrows+1;
4672     buf_si[0]   = nrows;
4673     buf_si_i[0] = 0;
4674     nrows = 0;
4675     for (i=owners[proc]; i<owners[proc+1]; i++){
4676       anzi = ai[i+1] - ai[i];
4677       if (anzi) {
4678         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4679         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4680         nrows++;
4681       }
4682     }
4683     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4684     k++;
4685     buf_si += len_si[proc];
4686   }
4687 
4688   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4689   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4690 
4691   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4692   for (i=0; i<merge->nrecv; i++){
4693     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4694   }
4695 
4696   ierr = PetscFree(len_si);CHKERRQ(ierr);
4697   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4698   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4699   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4700   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4701   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4702   ierr = PetscFree(status);CHKERRQ(ierr);
4703 
4704   /* compute a local seq matrix in each processor */
4705   /*----------------------------------------------*/
4706   /* allocate bi array and free space for accumulating nonzero column info */
4707   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4708   bi[0] = 0;
4709 
4710   /* create and initialize a linked list */
4711   nlnk = N+1;
4712   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4713 
4714   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4715   len = 0;
4716   len  = ai[owners[rank+1]] - ai[owners[rank]];
4717   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4718   current_space = free_space;
4719 
4720   /* determine symbolic info for each local row */
4721   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4722 
4723   for (k=0; k<merge->nrecv; k++){
4724     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4725     nrows = *buf_ri_k[k];
4726     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4727     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4728   }
4729 
4730   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4731   len = 0;
4732   for (i=0;i<m;i++) {
4733     bnzi   = 0;
4734     /* add local non-zero cols of this proc's seqmat into lnk */
4735     arow   = owners[rank] + i;
4736     anzi   = ai[arow+1] - ai[arow];
4737     aj     = a->j + ai[arow];
4738     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4739     bnzi += nlnk;
4740     /* add received col data into lnk */
4741     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4742       if (i == *nextrow[k]) { /* i-th row */
4743         anzi = *(nextai[k]+1) - *nextai[k];
4744         aj   = buf_rj[k] + *nextai[k];
4745         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4746         bnzi += nlnk;
4747         nextrow[k]++; nextai[k]++;
4748       }
4749     }
4750     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4751 
4752     /* if free space is not available, make more free space */
4753     if (current_space->local_remaining<bnzi) {
4754       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4755       nspacedouble++;
4756     }
4757     /* copy data into free space, then initialize lnk */
4758     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4759     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4760 
4761     current_space->array           += bnzi;
4762     current_space->local_used      += bnzi;
4763     current_space->local_remaining -= bnzi;
4764 
4765     bi[i+1] = bi[i] + bnzi;
4766   }
4767 
4768   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4769 
4770   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4771   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4772   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4773 
4774   /* create symbolic parallel matrix B_mpi */
4775   /*---------------------------------------*/
4776   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4777   if (n==PETSC_DECIDE) {
4778     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4779   } else {
4780     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4781   }
4782   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4783   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4784   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4785 
4786   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4787   B_mpi->assembled     = PETSC_FALSE;
4788   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4789   merge->bi            = bi;
4790   merge->bj            = bj;
4791   merge->buf_ri        = buf_ri;
4792   merge->buf_rj        = buf_rj;
4793   merge->coi           = PETSC_NULL;
4794   merge->coj           = PETSC_NULL;
4795   merge->owners_co     = PETSC_NULL;
4796 
4797   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4798 
4799   /* attach the supporting struct to B_mpi for reuse */
4800   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4801   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4802   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4803   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4804   *mpimat = B_mpi;
4805 
4806   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4807   PetscFunctionReturn(0);
4808 }
4809 
4810 #undef __FUNCT__
4811 #define __FUNCT__ "MatMerge_SeqsToMPI"
4812 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4813 {
4814   PetscErrorCode   ierr;
4815 
4816   PetscFunctionBegin;
4817   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4818   if (scall == MAT_INITIAL_MATRIX){
4819     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4820   }
4821   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4822   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4823   PetscFunctionReturn(0);
4824 }
4825 
4826 #undef __FUNCT__
4827 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4828 /*@
4829      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4830           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4831           with MatGetSize()
4832 
4833     Not Collective
4834 
4835    Input Parameters:
4836 +    A - the matrix
4837 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4838 
4839    Output Parameter:
4840 .    A_loc - the local sequential matrix generated
4841 
4842     Level: developer
4843 
4844 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4845 
4846 @*/
4847 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4848 {
4849   PetscErrorCode  ierr;
4850   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4851   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4852   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4853   MatScalar       *aa=a->a,*ba=b->a,*cam;
4854   PetscScalar     *ca;
4855   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4856   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4857   PetscBool       match;
4858 
4859   PetscFunctionBegin;
4860   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4861   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4862   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4863   if (scall == MAT_INITIAL_MATRIX){
4864     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4865     ci[0] = 0;
4866     for (i=0; i<am; i++){
4867       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4868     }
4869     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4870     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4871     k = 0;
4872     for (i=0; i<am; i++) {
4873       ncols_o = bi[i+1] - bi[i];
4874       ncols_d = ai[i+1] - ai[i];
4875       /* off-diagonal portion of A */
4876       for (jo=0; jo<ncols_o; jo++) {
4877         col = cmap[*bj];
4878         if (col >= cstart) break;
4879         cj[k]   = col; bj++;
4880         ca[k++] = *ba++;
4881       }
4882       /* diagonal portion of A */
4883       for (j=0; j<ncols_d; j++) {
4884         cj[k]   = cstart + *aj++;
4885         ca[k++] = *aa++;
4886       }
4887       /* off-diagonal portion of A */
4888       for (j=jo; j<ncols_o; j++) {
4889         cj[k]   = cmap[*bj++];
4890         ca[k++] = *ba++;
4891       }
4892     }
4893     /* put together the new matrix */
4894     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4895     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4896     /* Since these are PETSc arrays, change flags to free them as necessary. */
4897     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4898     mat->free_a  = PETSC_TRUE;
4899     mat->free_ij = PETSC_TRUE;
4900     mat->nonew   = 0;
4901   } else if (scall == MAT_REUSE_MATRIX){
4902     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4903     ci = mat->i; cj = mat->j; cam = mat->a;
4904     for (i=0; i<am; i++) {
4905       /* off-diagonal portion of A */
4906       ncols_o = bi[i+1] - bi[i];
4907       for (jo=0; jo<ncols_o; jo++) {
4908         col = cmap[*bj];
4909         if (col >= cstart) break;
4910         *cam++ = *ba++; bj++;
4911       }
4912       /* diagonal portion of A */
4913       ncols_d = ai[i+1] - ai[i];
4914       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4915       /* off-diagonal portion of A */
4916       for (j=jo; j<ncols_o; j++) {
4917         *cam++ = *ba++; bj++;
4918       }
4919     }
4920   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4921   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4922   PetscFunctionReturn(0);
4923 }
4924 
4925 #undef __FUNCT__
4926 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4927 /*@C
4928      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4929 
4930     Not Collective
4931 
4932    Input Parameters:
4933 +    A - the matrix
4934 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4935 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4936 
4937    Output Parameter:
4938 .    A_loc - the local sequential matrix generated
4939 
4940     Level: developer
4941 
4942 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4943 
4944 @*/
4945 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4946 {
4947   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4948   PetscErrorCode    ierr;
4949   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4950   IS                isrowa,iscola;
4951   Mat               *aloc;
4952   PetscBool       match;
4953 
4954   PetscFunctionBegin;
4955   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4956   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4957   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4958   if (!row){
4959     start = A->rmap->rstart; end = A->rmap->rend;
4960     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4961   } else {
4962     isrowa = *row;
4963   }
4964   if (!col){
4965     start = A->cmap->rstart;
4966     cmap  = a->garray;
4967     nzA   = a->A->cmap->n;
4968     nzB   = a->B->cmap->n;
4969     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4970     ncols = 0;
4971     for (i=0; i<nzB; i++) {
4972       if (cmap[i] < start) idx[ncols++] = cmap[i];
4973       else break;
4974     }
4975     imark = i;
4976     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4977     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4978     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4979   } else {
4980     iscola = *col;
4981   }
4982   if (scall != MAT_INITIAL_MATRIX){
4983     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4984     aloc[0] = *A_loc;
4985   }
4986   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4987   *A_loc = aloc[0];
4988   ierr = PetscFree(aloc);CHKERRQ(ierr);
4989   if (!row){
4990     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4991   }
4992   if (!col){
4993     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4994   }
4995   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4996   PetscFunctionReturn(0);
4997 }
4998 
4999 #undef __FUNCT__
5000 #define __FUNCT__ "MatGetBrowsOfAcols"
5001 /*@C
5002     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5003 
5004     Collective on Mat
5005 
5006    Input Parameters:
5007 +    A,B - the matrices in mpiaij format
5008 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5009 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5010 
5011    Output Parameter:
5012 +    rowb, colb - index sets of rows and columns of B to extract
5013 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
5014 -    B_seq - the sequential matrix generated
5015 
5016     Level: developer
5017 
5018 @*/
5019 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
5020 {
5021   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5022   PetscErrorCode    ierr;
5023   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5024   IS                isrowb,iscolb;
5025   Mat               *bseq;
5026 
5027   PetscFunctionBegin;
5028   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5029     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5030   }
5031   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5032 
5033   if (scall == MAT_INITIAL_MATRIX){
5034     start = A->cmap->rstart;
5035     cmap  = a->garray;
5036     nzA   = a->A->cmap->n;
5037     nzB   = a->B->cmap->n;
5038     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5039     ncols = 0;
5040     for (i=0; i<nzB; i++) {  /* row < local row index */
5041       if (cmap[i] < start) idx[ncols++] = cmap[i];
5042       else break;
5043     }
5044     imark = i;
5045     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5046     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5047     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5048     *brstart = imark;
5049     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5050   } else {
5051     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5052     isrowb = *rowb; iscolb = *colb;
5053     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5054     bseq[0] = *B_seq;
5055   }
5056   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5057   *B_seq = bseq[0];
5058   ierr = PetscFree(bseq);CHKERRQ(ierr);
5059   if (!rowb){
5060     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5061   } else {
5062     *rowb = isrowb;
5063   }
5064   if (!colb){
5065     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5066   } else {
5067     *colb = iscolb;
5068   }
5069   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5070   PetscFunctionReturn(0);
5071 }
5072 
5073 #undef __FUNCT__
5074 #define __FUNCT__ "MatGetBrowsOfAoCols"
5075 /*@C
5076     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5077     of the OFF-DIAGONAL portion of local A
5078 
5079     Collective on Mat
5080 
5081    Input Parameters:
5082 +    A,B - the matrices in mpiaij format
5083 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5084 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5085 .    startsj_r - similar to startsj for receives
5086 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5087 
5088    Output Parameter:
5089 +    B_oth - the sequential matrix generated
5090 
5091     Level: developer
5092 
5093 @*/
5094 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5095 {
5096   VecScatter_MPI_General *gen_to,*gen_from;
5097   PetscErrorCode         ierr;
5098   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5099   Mat_SeqAIJ             *b_oth;
5100   VecScatter             ctx=a->Mvctx;
5101   MPI_Comm               comm=((PetscObject)ctx)->comm;
5102   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5103   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5104   PetscScalar            *rvalues,*svalues;
5105   MatScalar              *b_otha,*bufa,*bufA;
5106   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5107   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5108   MPI_Status             *sstatus,rstatus;
5109   PetscMPIInt            jj;
5110   PetscInt               *cols,sbs,rbs;
5111   PetscScalar            *vals;
5112 
5113   PetscFunctionBegin;
5114   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5115     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5116   }
5117   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5118   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5119 
5120   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5121   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5122   rvalues  = gen_from->values; /* holds the length of receiving row */
5123   svalues  = gen_to->values;   /* holds the length of sending row */
5124   nrecvs   = gen_from->n;
5125   nsends   = gen_to->n;
5126 
5127   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5128   srow     = gen_to->indices;   /* local row index to be sent */
5129   sstarts  = gen_to->starts;
5130   sprocs   = gen_to->procs;
5131   sstatus  = gen_to->sstatus;
5132   sbs      = gen_to->bs;
5133   rstarts  = gen_from->starts;
5134   rprocs   = gen_from->procs;
5135   rbs      = gen_from->bs;
5136 
5137   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5138   if (scall == MAT_INITIAL_MATRIX){
5139     /* i-array */
5140     /*---------*/
5141     /*  post receives */
5142     for (i=0; i<nrecvs; i++){
5143       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5144       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5145       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5146     }
5147 
5148     /* pack the outgoing message */
5149     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5150     sstartsj[0] = 0;  rstartsj[0] = 0;
5151     len = 0; /* total length of j or a array to be sent */
5152     k = 0;
5153     for (i=0; i<nsends; i++){
5154       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5155       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5156       for (j=0; j<nrows; j++) {
5157         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5158         for (l=0; l<sbs; l++){
5159           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5160           rowlen[j*sbs+l] = ncols;
5161           len += ncols;
5162           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5163         }
5164         k++;
5165       }
5166       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5167       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5168     }
5169     /* recvs and sends of i-array are completed */
5170     i = nrecvs;
5171     while (i--) {
5172       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5173     }
5174     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5175 
5176     /* allocate buffers for sending j and a arrays */
5177     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5178     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5179 
5180     /* create i-array of B_oth */
5181     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5182     b_othi[0] = 0;
5183     len = 0; /* total length of j or a array to be received */
5184     k = 0;
5185     for (i=0; i<nrecvs; i++){
5186       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5187       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5188       for (j=0; j<nrows; j++) {
5189         b_othi[k+1] = b_othi[k] + rowlen[j];
5190         len += rowlen[j]; k++;
5191       }
5192       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5193     }
5194 
5195     /* allocate space for j and a arrrays of B_oth */
5196     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5197     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5198 
5199     /* j-array */
5200     /*---------*/
5201     /*  post receives of j-array */
5202     for (i=0; i<nrecvs; i++){
5203       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5204       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5205     }
5206 
5207     /* pack the outgoing message j-array */
5208     k = 0;
5209     for (i=0; i<nsends; i++){
5210       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5211       bufJ = bufj+sstartsj[i];
5212       for (j=0; j<nrows; j++) {
5213         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5214         for (ll=0; ll<sbs; ll++){
5215           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5216           for (l=0; l<ncols; l++){
5217             *bufJ++ = cols[l];
5218           }
5219           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5220         }
5221       }
5222       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5223     }
5224 
5225     /* recvs and sends of j-array are completed */
5226     i = nrecvs;
5227     while (i--) {
5228       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5229     }
5230     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5231   } else if (scall == MAT_REUSE_MATRIX){
5232     sstartsj = *startsj;
5233     rstartsj = *startsj_r;
5234     bufa     = *bufa_ptr;
5235     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5236     b_otha   = b_oth->a;
5237   } else {
5238     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5239   }
5240 
5241   /* a-array */
5242   /*---------*/
5243   /*  post receives of a-array */
5244   for (i=0; i<nrecvs; i++){
5245     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5246     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5247   }
5248 
5249   /* pack the outgoing message a-array */
5250   k = 0;
5251   for (i=0; i<nsends; i++){
5252     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5253     bufA = bufa+sstartsj[i];
5254     for (j=0; j<nrows; j++) {
5255       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5256       for (ll=0; ll<sbs; ll++){
5257         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5258         for (l=0; l<ncols; l++){
5259           *bufA++ = vals[l];
5260         }
5261         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5262       }
5263     }
5264     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5265   }
5266   /* recvs and sends of a-array are completed */
5267   i = nrecvs;
5268   while (i--) {
5269     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5270   }
5271   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5272   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5273 
5274   if (scall == MAT_INITIAL_MATRIX){
5275     /* put together the new matrix */
5276     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5277 
5278     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5279     /* Since these are PETSc arrays, change flags to free them as necessary. */
5280     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5281     b_oth->free_a  = PETSC_TRUE;
5282     b_oth->free_ij = PETSC_TRUE;
5283     b_oth->nonew   = 0;
5284 
5285     ierr = PetscFree(bufj);CHKERRQ(ierr);
5286     if (!startsj || !bufa_ptr){
5287       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5288       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5289     } else {
5290       *startsj   = sstartsj;
5291       *startsj_r = rstartsj;
5292       *bufa_ptr  = bufa;
5293     }
5294   }
5295   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5296   PetscFunctionReturn(0);
5297 }
5298 
5299 #undef __FUNCT__
5300 #define __FUNCT__ "MatGetCommunicationStructs"
5301 /*@C
5302   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5303 
5304   Not Collective
5305 
5306   Input Parameters:
5307 . A - The matrix in mpiaij format
5308 
5309   Output Parameter:
5310 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5311 . colmap - A map from global column index to local index into lvec
5312 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5313 
5314   Level: developer
5315 
5316 @*/
5317 #if defined (PETSC_USE_CTABLE)
5318 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5319 #else
5320 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5321 #endif
5322 {
5323   Mat_MPIAIJ *a;
5324 
5325   PetscFunctionBegin;
5326   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5327   PetscValidPointer(lvec, 2);
5328   PetscValidPointer(colmap, 3);
5329   PetscValidPointer(multScatter, 4);
5330   a = (Mat_MPIAIJ *) A->data;
5331   if (lvec) *lvec = a->lvec;
5332   if (colmap) *colmap = a->colmap;
5333   if (multScatter) *multScatter = a->Mvctx;
5334   PetscFunctionReturn(0);
5335 }
5336 
5337 EXTERN_C_BEGIN
5338 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5339 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5340 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5341 EXTERN_C_END
5342 
5343 #undef __FUNCT__
5344 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5345 /*
5346     Computes (B'*A')' since computing B*A directly is untenable
5347 
5348                n                       p                          p
5349         (              )       (              )         (                  )
5350       m (      A       )  *  n (       B      )   =   m (         C        )
5351         (              )       (              )         (                  )
5352 
5353 */
5354 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5355 {
5356   PetscErrorCode     ierr;
5357   Mat                At,Bt,Ct;
5358 
5359   PetscFunctionBegin;
5360   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5361   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5362   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5363   ierr = MatDestroy(&At);CHKERRQ(ierr);
5364   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5365   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5366   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5367   PetscFunctionReturn(0);
5368 }
5369 
5370 #undef __FUNCT__
5371 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5372 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5373 {
5374   PetscErrorCode ierr;
5375   PetscInt       m=A->rmap->n,n=B->cmap->n;
5376   Mat            Cmat;
5377 
5378   PetscFunctionBegin;
5379   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5380   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5381   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5382   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5383   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5384   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5385   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5386   *C   = Cmat;
5387   PetscFunctionReturn(0);
5388 }
5389 
5390 /* ----------------------------------------------------------------*/
5391 #undef __FUNCT__
5392 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5393 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5394 {
5395   PetscErrorCode ierr;
5396 
5397   PetscFunctionBegin;
5398   if (scall == MAT_INITIAL_MATRIX){
5399     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5400   }
5401   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5402   PetscFunctionReturn(0);
5403 }
5404 
5405 EXTERN_C_BEGIN
5406 #if defined(PETSC_HAVE_MUMPS)
5407 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5408 #endif
5409 #if defined(PETSC_HAVE_PASTIX)
5410 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5411 #endif
5412 #if defined(PETSC_HAVE_SUPERLU_DIST)
5413 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5414 #endif
5415 #if defined(PETSC_HAVE_SPOOLES)
5416 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5417 #endif
5418 EXTERN_C_END
5419 
5420 /*MC
5421    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5422 
5423    Options Database Keys:
5424 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5425 
5426   Level: beginner
5427 
5428 .seealso: MatCreateMPIAIJ()
5429 M*/
5430 
5431 EXTERN_C_BEGIN
5432 #undef __FUNCT__
5433 #define __FUNCT__ "MatCreate_MPIAIJ"
5434 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5435 {
5436   Mat_MPIAIJ     *b;
5437   PetscErrorCode ierr;
5438   PetscMPIInt    size;
5439 
5440   PetscFunctionBegin;
5441   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5442 
5443   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5444   B->data         = (void*)b;
5445   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5446   B->rmap->bs     = 1;
5447   B->assembled    = PETSC_FALSE;
5448 
5449   B->insertmode   = NOT_SET_VALUES;
5450   b->size         = size;
5451   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5452 
5453   /* build cache for off array entries formed */
5454   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5455   b->donotstash  = PETSC_FALSE;
5456   b->colmap      = 0;
5457   b->garray      = 0;
5458   b->roworiented = PETSC_TRUE;
5459 
5460   /* stuff used for matrix vector multiply */
5461   b->lvec      = PETSC_NULL;
5462   b->Mvctx     = PETSC_NULL;
5463 
5464   /* stuff for MatGetRow() */
5465   b->rowindices   = 0;
5466   b->rowvalues    = 0;
5467   b->getrowactive = PETSC_FALSE;
5468 
5469 #if defined(PETSC_HAVE_SPOOLES)
5470   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5471                                      "MatGetFactor_mpiaij_spooles",
5472                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5473 #endif
5474 #if defined(PETSC_HAVE_MUMPS)
5475   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5476                                      "MatGetFactor_aij_mumps",
5477                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5478 #endif
5479 #if defined(PETSC_HAVE_PASTIX)
5480   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5481 					   "MatGetFactor_mpiaij_pastix",
5482 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5483 #endif
5484 #if defined(PETSC_HAVE_SUPERLU_DIST)
5485   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5486                                      "MatGetFactor_mpiaij_superlu_dist",
5487                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5488 #endif
5489   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5490                                      "MatStoreValues_MPIAIJ",
5491                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5492   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5493                                      "MatRetrieveValues_MPIAIJ",
5494                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5495   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5496 				     "MatGetDiagonalBlock_MPIAIJ",
5497                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5498   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5499 				     "MatIsTranspose_MPIAIJ",
5500 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5501   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5502 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5503 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5504   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5505 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5506 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5507   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5508 				     "MatDiagonalScaleLocal_MPIAIJ",
5509 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5510   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5511                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5512                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5513   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5514                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5515                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5516   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5517                                      "MatConvert_MPIAIJ_MPISBAIJ",
5518                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5519   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5520                                      "MatMatMult_MPIDense_MPIAIJ",
5521                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5522   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5523                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5524                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5525   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5526                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5527                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5528   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5529   PetscFunctionReturn(0);
5530 }
5531 EXTERN_C_END
5532 
5533 #undef __FUNCT__
5534 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5535 /*@
5536      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5537          and "off-diagonal" part of the matrix in CSR format.
5538 
5539    Collective on MPI_Comm
5540 
5541    Input Parameters:
5542 +  comm - MPI communicator
5543 .  m - number of local rows (Cannot be PETSC_DECIDE)
5544 .  n - This value should be the same as the local size used in creating the
5545        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5546        calculated if N is given) For square matrices n is almost always m.
5547 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5548 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5549 .   i - row indices for "diagonal" portion of matrix
5550 .   j - column indices
5551 .   a - matrix values
5552 .   oi - row indices for "off-diagonal" portion of matrix
5553 .   oj - column indices
5554 -   oa - matrix values
5555 
5556    Output Parameter:
5557 .   mat - the matrix
5558 
5559    Level: advanced
5560 
5561    Notes:
5562        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5563        must free the arrays once the matrix has been destroyed and not before.
5564 
5565        The i and j indices are 0 based
5566 
5567        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5568 
5569        This sets local rows and cannot be used to set off-processor values.
5570 
5571        You cannot later use MatSetValues() to change values in this matrix.
5572 
5573 .keywords: matrix, aij, compressed row, sparse, parallel
5574 
5575 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5576           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5577 @*/
5578 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5579 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5580 {
5581   PetscErrorCode ierr;
5582   Mat_MPIAIJ     *maij;
5583 
5584  PetscFunctionBegin;
5585   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5586   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5587   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5588   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5589   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5590   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5591   maij = (Mat_MPIAIJ*) (*mat)->data;
5592   maij->donotstash     = PETSC_TRUE;
5593   (*mat)->preallocated = PETSC_TRUE;
5594 
5595   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5596   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5597   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5598   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5599 
5600   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5601   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5602 
5603   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5604   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5605   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5606   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5607 
5608   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5609   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5610   PetscFunctionReturn(0);
5611 }
5612 
5613 /*
5614     Special version for direct calls from Fortran
5615 */
5616 #include <private/fortranimpl.h>
5617 
5618 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5619 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5620 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5621 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5622 #endif
5623 
5624 /* Change these macros so can be used in void function */
5625 #undef CHKERRQ
5626 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5627 #undef SETERRQ2
5628 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5629 #undef SETERRQ
5630 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5631 
5632 EXTERN_C_BEGIN
5633 #undef __FUNCT__
5634 #define __FUNCT__ "matsetvaluesmpiaij_"
5635 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5636 {
5637   Mat             mat = *mmat;
5638   PetscInt        m = *mm, n = *mn;
5639   InsertMode      addv = *maddv;
5640   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5641   PetscScalar     value;
5642   PetscErrorCode  ierr;
5643 
5644   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5645   if (mat->insertmode == NOT_SET_VALUES) {
5646     mat->insertmode = addv;
5647   }
5648 #if defined(PETSC_USE_DEBUG)
5649   else if (mat->insertmode != addv) {
5650     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5651   }
5652 #endif
5653   {
5654   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5655   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5656   PetscBool       roworiented = aij->roworiented;
5657 
5658   /* Some Variables required in the macro */
5659   Mat             A = aij->A;
5660   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5661   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5662   MatScalar       *aa = a->a;
5663   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5664   Mat             B = aij->B;
5665   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5666   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5667   MatScalar       *ba = b->a;
5668 
5669   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5670   PetscInt        nonew = a->nonew;
5671   MatScalar       *ap1,*ap2;
5672 
5673   PetscFunctionBegin;
5674   for (i=0; i<m; i++) {
5675     if (im[i] < 0) continue;
5676 #if defined(PETSC_USE_DEBUG)
5677     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5678 #endif
5679     if (im[i] >= rstart && im[i] < rend) {
5680       row      = im[i] - rstart;
5681       lastcol1 = -1;
5682       rp1      = aj + ai[row];
5683       ap1      = aa + ai[row];
5684       rmax1    = aimax[row];
5685       nrow1    = ailen[row];
5686       low1     = 0;
5687       high1    = nrow1;
5688       lastcol2 = -1;
5689       rp2      = bj + bi[row];
5690       ap2      = ba + bi[row];
5691       rmax2    = bimax[row];
5692       nrow2    = bilen[row];
5693       low2     = 0;
5694       high2    = nrow2;
5695 
5696       for (j=0; j<n; j++) {
5697         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5698         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5699         if (in[j] >= cstart && in[j] < cend){
5700           col = in[j] - cstart;
5701           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5702         } else if (in[j] < 0) continue;
5703 #if defined(PETSC_USE_DEBUG)
5704         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5705 #endif
5706         else {
5707           if (mat->was_assembled) {
5708             if (!aij->colmap) {
5709               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5710             }
5711 #if defined (PETSC_USE_CTABLE)
5712             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5713 	    col--;
5714 #else
5715             col = aij->colmap[in[j]] - 1;
5716 #endif
5717             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5718               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5719               col =  in[j];
5720               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5721               B = aij->B;
5722               b = (Mat_SeqAIJ*)B->data;
5723               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5724               rp2      = bj + bi[row];
5725               ap2      = ba + bi[row];
5726               rmax2    = bimax[row];
5727               nrow2    = bilen[row];
5728               low2     = 0;
5729               high2    = nrow2;
5730               bm       = aij->B->rmap->n;
5731               ba = b->a;
5732             }
5733           } else col = in[j];
5734           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5735         }
5736       }
5737     } else {
5738       if (!aij->donotstash) {
5739         if (roworiented) {
5740           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5741         } else {
5742           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5743         }
5744       }
5745     }
5746   }}
5747   PetscFunctionReturnVoid();
5748 }
5749 EXTERN_C_END
5750 
5751