xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 490ca623e2ad2986c65f7c02db83f9fc53aebec2)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew = a->nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506         } else if (in[j] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509 #endif
510         else {
511           if (mat->was_assembled) {
512             if (!aij->colmap) {
513               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
514             }
515 #if defined (PETSC_USE_CTABLE)
516             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
517 	    col--;
518 #else
519             col = aij->colmap[in[j]] - 1;
520 #endif
521             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523               col =  in[j];
524               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525               B = aij->B;
526               b = (Mat_SeqAIJ*)B->data;
527               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528               rp2      = bj + bi[row];
529               ap2      = ba + bi[row];
530               rmax2    = bimax[row];
531               nrow2    = bilen[row];
532               low2     = 0;
533               high2    = nrow2;
534               bm       = aij->B->rmap->n;
535               ba = b->a;
536             }
537           } else col = in[j];
538           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539         }
540       }
541     } else {
542       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543       if (!aij->donotstash) {
544         if (roworiented) {
545           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
546         } else {
547           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPIAIJ"
557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558 {
559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
560   PetscErrorCode ierr;
561   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
563 
564   PetscFunctionBegin;
565   for (i=0; i<m; i++) {
566     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568     if (idxm[i] >= rstart && idxm[i] < rend) {
569       row = idxm[i] - rstart;
570       for (j=0; j<n; j++) {
571         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573         if (idxn[j] >= cstart && idxn[j] < cend){
574           col = idxn[j] - cstart;
575           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
576         } else {
577           if (!aij->colmap) {
578             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
579           }
580 #if defined (PETSC_USE_CTABLE)
581           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
582           col --;
583 #else
584           col = aij->colmap[idxn[j]] - 1;
585 #endif
586           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587           else {
588             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604 {
605   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
606   PetscErrorCode ierr;
607   PetscInt       nstash,reallocs;
608   InsertMode     addv;
609 
610   PetscFunctionBegin;
611   if (aij->donotstash || mat->nooffprocentries) {
612     PetscFunctionReturn(0);
613   }
614 
615   /* make sure all processors are either in INSERTMODE or ADDMODE */
616   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
617   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618   mat->insertmode = addv; /* in case this processor had no cache */
619 
620   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
621   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
622   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
632   PetscErrorCode ierr;
633   PetscMPIInt    n;
634   PetscInt       i,j,rstart,ncols,flg;
635   PetscInt       *row,*col;
636   PetscBool      other_disassembled;
637   PetscScalar    *val;
638   InsertMode     addv = mat->insertmode;
639 
640   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
641   PetscFunctionBegin;
642   if (!aij->donotstash && !mat->nooffprocentries) {
643     while (1) {
644       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
645       if (!flg) break;
646 
647       for (i=0; i<n;) {
648         /* Now identify the consecutive vals belonging to the same row */
649         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650         if (j < n) ncols = j-i;
651         else       ncols = n-i;
652         /* Now assemble all these values with a single function call */
653         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
654         i = j;
655       }
656     }
657     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
658   }
659   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
660   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
661 
662   /* determine if any processor has disassembled, if so we must
663      also disassemble ourselfs, in order that we may reassemble. */
664   /*
665      if nonzero structure of submatrix B cannot change then we know that
666      no processor disassembled thus we can skip this stuff
667   */
668   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
669     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
670     if (mat->was_assembled && !other_disassembled) {
671       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
672     }
673   }
674   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
676   }
677   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
678   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
679   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
681 
682   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
683   aij->rowvalues = 0;
684 
685   /* used by MatAXPY() */
686   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
687   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
688 
689   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
690   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697 {
698   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
699   PetscErrorCode ierr;
700 
701   PetscFunctionBegin;
702   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
703   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatZeroRows_MPIAIJ"
709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710 {
711   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
712   PetscErrorCode    ierr;
713   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714   PetscInt          i,*owners = A->rmap->range;
715   PetscInt          *nprocs,j,idx,nsends,row;
716   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
717   PetscInt          *rvalues,count,base,slen,*source;
718   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
719   MPI_Comm          comm = ((PetscObject)A)->comm;
720   MPI_Request       *send_waits,*recv_waits;
721   MPI_Status        recv_status,*send_status;
722   const PetscScalar *xx;
723   PetscScalar       *bb;
724 #if defined(PETSC_DEBUG)
725   PetscBool      found = PETSC_FALSE;
726 #endif
727 
728   PetscFunctionBegin;
729   /*  first count number of contributors to each processor */
730   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
731   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
732   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
733   j = 0;
734   for (i=0; i<N; i++) {
735     if (lastidx > (idx = rows[i])) j = 0;
736     lastidx = idx;
737     for (; j<size; j++) {
738       if (idx >= owners[j] && idx < owners[j+1]) {
739         nprocs[2*j]++;
740         nprocs[2*j+1] = 1;
741         owner[i] = j;
742 #if defined(PETSC_DEBUG)
743         found = PETSC_TRUE;
744 #endif
745         break;
746       }
747     }
748 #if defined(PETSC_DEBUG)
749     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750     found = PETSC_FALSE;
751 #endif
752   }
753   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
754 
755   if (A->nooffproczerorows) {
756     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757     nrecvs = nsends;
758     nmax   = N;
759   } else {
760     /* inform other processors of number of messages and max length*/
761     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
762   }
763 
764   /* post receives:   */
765   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
766   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
767   for (i=0; i<nrecvs; i++) {
768     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
769   }
770 
771   /* do sends:
772       1) starts[i] gives the starting index in svalues for stuff going to
773          the ith processor
774   */
775   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
776   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
777   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
778   starts[0] = 0;
779   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780   for (i=0; i<N; i++) {
781     svalues[starts[owner[i]]++] = rows[i];
782   }
783 
784   starts[0] = 0;
785   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786   count = 0;
787   for (i=0; i<size; i++) {
788     if (nprocs[2*i+1]) {
789       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
790     }
791   }
792   ierr = PetscFree(starts);CHKERRQ(ierr);
793 
794   base = owners[rank];
795 
796   /*  wait on receives */
797   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
798   count  = nrecvs; slen = 0;
799   while (count) {
800     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
801     /* unpack receives into our local space */
802     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
803     source[imdex]  = recv_status.MPI_SOURCE;
804     lens[imdex]    = n;
805     slen          += n;
806     count--;
807   }
808   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
809 
810   /* move the data into the send scatter */
811   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
812   count = 0;
813   for (i=0; i<nrecvs; i++) {
814     values = rvalues + i*nmax;
815     for (j=0; j<lens[i]; j++) {
816       lrows[count++] = values[j] - base;
817     }
818   }
819   ierr = PetscFree(rvalues);CHKERRQ(ierr);
820   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
821   ierr = PetscFree(owner);CHKERRQ(ierr);
822   ierr = PetscFree(nprocs);CHKERRQ(ierr);
823 
824   /* fix right hand side if needed */
825   if (x && b) {
826     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
827     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
828     for (i=0; i<slen; i++) {
829       bb[lrows[i]] = diag*xx[lrows[i]];
830     }
831     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
833   }
834   /*
835         Zero the required rows. If the "diagonal block" of the matrix
836      is square and the user wishes to set the diagonal we use separate
837      code so that MatSetValues() is not called for each diagonal allocating
838      new memory, thus calling lots of mallocs and slowing things down.
839 
840   */
841   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
843   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850     }
851     for (i = 0; i < slen; i++) {
852       row  = lrows[i] + rstart;
853       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
854     }
855     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857   } else {
858     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(lrows);CHKERRQ(ierr);
861 
862   /* wait on sends */
863   if (nsends) {
864     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
865     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
866     ierr = PetscFree(send_status);CHKERRQ(ierr);
867   }
868   ierr = PetscFree(send_waits);CHKERRQ(ierr);
869   ierr = PetscFree(svalues);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
876 {
877   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode    ierr;
879   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
880   PetscInt          i,*owners = A->rmap->range;
881   PetscInt          *nprocs,j,idx,nsends;
882   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
883   PetscInt          *rvalues,count,base,slen,*source;
884   PetscInt          *lens,*lrows,*values,m;
885   MPI_Comm          comm = ((PetscObject)A)->comm;
886   MPI_Request       *send_waits,*recv_waits;
887   MPI_Status        recv_status,*send_status;
888   const PetscScalar *xx;
889   PetscScalar       *bb,*mask;
890   Vec               xmask,lmask;
891   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
892   const PetscInt    *aj, *ii,*ridx;
893   PetscScalar       *aa;
894 #if defined(PETSC_DEBUG)
895   PetscBool         found = PETSC_FALSE;
896 #endif
897 
898   PetscFunctionBegin;
899   /*  first count number of contributors to each processor */
900   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
901   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
902   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
903   j = 0;
904   for (i=0; i<N; i++) {
905     if (lastidx > (idx = rows[i])) j = 0;
906     lastidx = idx;
907     for (; j<size; j++) {
908       if (idx >= owners[j] && idx < owners[j+1]) {
909         nprocs[2*j]++;
910         nprocs[2*j+1] = 1;
911         owner[i] = j;
912 #if defined(PETSC_DEBUG)
913         found = PETSC_TRUE;
914 #endif
915         break;
916       }
917     }
918 #if defined(PETSC_DEBUG)
919     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
920     found = PETSC_FALSE;
921 #endif
922   }
923   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
924 
925   /* inform other processors of number of messages and max length*/
926   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
927 
928   /* post receives:   */
929   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
930   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
931   for (i=0; i<nrecvs; i++) {
932     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
933   }
934 
935   /* do sends:
936       1) starts[i] gives the starting index in svalues for stuff going to
937          the ith processor
938   */
939   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
940   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
941   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
942   starts[0] = 0;
943   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
944   for (i=0; i<N; i++) {
945     svalues[starts[owner[i]]++] = rows[i];
946   }
947 
948   starts[0] = 0;
949   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
950   count = 0;
951   for (i=0; i<size; i++) {
952     if (nprocs[2*i+1]) {
953       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
954     }
955   }
956   ierr = PetscFree(starts);CHKERRQ(ierr);
957 
958   base = owners[rank];
959 
960   /*  wait on receives */
961   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
962   count  = nrecvs; slen = 0;
963   while (count) {
964     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
965     /* unpack receives into our local space */
966     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
967     source[imdex]  = recv_status.MPI_SOURCE;
968     lens[imdex]    = n;
969     slen          += n;
970     count--;
971   }
972   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
973 
974   /* move the data into the send scatter */
975   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
976   count = 0;
977   for (i=0; i<nrecvs; i++) {
978     values = rvalues + i*nmax;
979     for (j=0; j<lens[i]; j++) {
980       lrows[count++] = values[j] - base;
981     }
982   }
983   ierr = PetscFree(rvalues);CHKERRQ(ierr);
984   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
985   ierr = PetscFree(owner);CHKERRQ(ierr);
986   ierr = PetscFree(nprocs);CHKERRQ(ierr);
987   /* lrows are the local rows to be zeroed, slen is the number of local rows */
988 
989   /* zero diagonal part of matrix */
990   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
991 
992   /* handle off diagonal part of matrix */
993   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
994   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
995   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
996   for (i=0; i<slen; i++) {
997     bb[lrows[i]] = 1;
998   }
999   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1000   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1003   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1006   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1007   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1008 
1009   /* remove zeroed rows of off diagonal matrix */
1010   ii = aij->i;
1011   for (i=0; i<slen; i++) {
1012     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1013   }
1014 
1015   /* loop over all elements of off process part of matrix zeroing removed columns*/
1016   if (aij->compressedrow.use){
1017     m    = aij->compressedrow.nrows;
1018     ii   = aij->compressedrow.i;
1019     ridx = aij->compressedrow.rindex;
1020     for (i=0; i<m; i++){
1021       n   = ii[i+1] - ii[i];
1022       aj  = aij->j + ii[i];
1023       aa  = aij->a + ii[i];
1024 
1025       for (j=0; j<n; j++) {
1026         if (PetscAbsScalar(mask[*aj])) {
1027           bb[*ridx] -= *aa*xx[*aj];
1028           *aa        = 0.0;
1029         }
1030         aa++;
1031         aj++;
1032       }
1033       ridx++;
1034     }
1035   } else { /* do not use compressed row format */
1036     m = l->B->rmap->n;
1037     for (i=0; i<m; i++) {
1038       n   = ii[i+1] - ii[i];
1039       aj  = aij->j + ii[i];
1040       aa  = aij->a + ii[i];
1041       for (j=0; j<n; j++) {
1042         if (PetscAbsScalar(mask[*aj])) {
1043           bb[i] -= *aa*xx[*aj];
1044           *aa    = 0.0;
1045         }
1046         aa++;
1047         aj++;
1048       }
1049     }
1050   }
1051   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1052   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1053   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1054   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1055   ierr = PetscFree(lrows);CHKERRQ(ierr);
1056 
1057   /* wait on sends */
1058   if (nsends) {
1059     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1060     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1061     ierr = PetscFree(send_status);CHKERRQ(ierr);
1062   }
1063   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1064   ierr = PetscFree(svalues);CHKERRQ(ierr);
1065 
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMult_MPIAIJ"
1071 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075   PetscInt       nt;
1076 
1077   PetscFunctionBegin;
1078   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1079   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1080   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1081   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1082   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1083   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1089 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1090 {
1091   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1101 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1102 {
1103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1108   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1109   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1116 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1117 {
1118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1119   PetscErrorCode ierr;
1120   PetscBool      merged;
1121 
1122   PetscFunctionBegin;
1123   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1124   /* do nondiagonal part */
1125   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1126   if (!merged) {
1127     /* send it on its way */
1128     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1129     /* do local part */
1130     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1131     /* receive remote parts: note this assumes the values are not actually */
1132     /* added in yy until the next line, */
1133     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1134   } else {
1135     /* do local part */
1136     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1137     /* send it on its way */
1138     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1139     /* values actually were received in the Begin() but we need to call this nop */
1140     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1141   }
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 EXTERN_C_BEGIN
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1148 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1149 {
1150   MPI_Comm       comm;
1151   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1152   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1153   IS             Me,Notme;
1154   PetscErrorCode ierr;
1155   PetscInt       M,N,first,last,*notme,i;
1156   PetscMPIInt    size;
1157 
1158   PetscFunctionBegin;
1159 
1160   /* Easy test: symmetric diagonal block */
1161   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1162   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1163   if (!*f) PetscFunctionReturn(0);
1164   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1165   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1166   if (size == 1) PetscFunctionReturn(0);
1167 
1168   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1169   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1170   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1171   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1172   for (i=0; i<first; i++) notme[i] = i;
1173   for (i=last; i<M; i++) notme[i-last+first] = i;
1174   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1175   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1176   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1177   Aoff = Aoffs[0];
1178   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1179   Boff = Boffs[0];
1180   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1181   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1182   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1183   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1184   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1185   ierr = PetscFree(notme);CHKERRQ(ierr);
1186    PetscFunctionReturn(0);
1187 }
1188 EXTERN_C_END
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1192 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1193 {
1194   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1195   PetscErrorCode ierr;
1196 
1197   PetscFunctionBegin;
1198   /* do nondiagonal part */
1199   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1200   /* send it on its way */
1201   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1202   /* do local part */
1203   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1204   /* receive remote parts */
1205   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 /*
1210   This only works correctly for square matrices where the subblock A->A is the
1211    diagonal block
1212 */
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1215 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1216 {
1217   PetscErrorCode ierr;
1218   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1219 
1220   PetscFunctionBegin;
1221   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1222   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1223   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatScale_MPIAIJ"
1229 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1230 {
1231   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1236   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatDestroy_MPIAIJ"
1242 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1243 {
1244   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1245   PetscErrorCode ierr;
1246 
1247   PetscFunctionBegin;
1248 #if defined(PETSC_USE_LOG)
1249   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1250 #endif
1251   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1252   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1253   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1254   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1255 #if defined (PETSC_USE_CTABLE)
1256   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1257 #else
1258   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1259 #endif
1260   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1262   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1263   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1264   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1265   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1266 
1267   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1281 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1282 {
1283   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1284   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1285   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1286   PetscErrorCode    ierr;
1287   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1288   int               fd;
1289   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1290   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1291   PetscScalar       *column_values;
1292   PetscInt          message_count,flowcontrolcount;
1293 
1294   PetscFunctionBegin;
1295   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1296   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1297   nz   = A->nz + B->nz;
1298   if (!rank) {
1299     header[0] = MAT_FILE_CLASSID;
1300     header[1] = mat->rmap->N;
1301     header[2] = mat->cmap->N;
1302     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1303     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1304     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1305     /* get largest number of rows any processor has */
1306     rlen = mat->rmap->n;
1307     range = mat->rmap->range;
1308     for (i=1; i<size; i++) {
1309       rlen = PetscMax(rlen,range[i+1] - range[i]);
1310     }
1311   } else {
1312     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1313     rlen = mat->rmap->n;
1314   }
1315 
1316   /* load up the local row counts */
1317   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1318   for (i=0; i<mat->rmap->n; i++) {
1319     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1320   }
1321 
1322   /* store the row lengths to the file */
1323   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1324   if (!rank) {
1325     MPI_Status status;
1326     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1327     for (i=1; i<size; i++) {
1328       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1329       rlen = range[i+1] - range[i];
1330       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1331       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1332     }
1333     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1334   } else {
1335     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1336     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1337     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1338   }
1339   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1340 
1341   /* load up the local column indices */
1342   nzmax = nz; /* )th processor needs space a largest processor needs */
1343   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1344   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1345   cnt  = 0;
1346   for (i=0; i<mat->rmap->n; i++) {
1347     for (j=B->i[i]; j<B->i[i+1]; j++) {
1348       if ( (col = garray[B->j[j]]) > cstart) break;
1349       column_indices[cnt++] = col;
1350     }
1351     for (k=A->i[i]; k<A->i[i+1]; k++) {
1352       column_indices[cnt++] = A->j[k] + cstart;
1353     }
1354     for (; j<B->i[i+1]; j++) {
1355       column_indices[cnt++] = garray[B->j[j]];
1356     }
1357   }
1358   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1359 
1360   /* store the column indices to the file */
1361    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1362   if (!rank) {
1363     MPI_Status status;
1364     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1365     for (i=1; i<size; i++) {
1366       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1367       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1368       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1369       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1370       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1371     }
1372      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1373   } else {
1374     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1375     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1376     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1377     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1378   }
1379   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1380 
1381   /* load up the local column values */
1382   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1383   cnt  = 0;
1384   for (i=0; i<mat->rmap->n; i++) {
1385     for (j=B->i[i]; j<B->i[i+1]; j++) {
1386       if ( garray[B->j[j]] > cstart) break;
1387       column_values[cnt++] = B->a[j];
1388     }
1389     for (k=A->i[i]; k<A->i[i+1]; k++) {
1390       column_values[cnt++] = A->a[k];
1391     }
1392     for (; j<B->i[i+1]; j++) {
1393       column_values[cnt++] = B->a[j];
1394     }
1395   }
1396   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1397 
1398   /* store the column values to the file */
1399    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1400   if (!rank) {
1401     MPI_Status status;
1402     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1403     for (i=1; i<size; i++) {
1404        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1405       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1406       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1407       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1408       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1409     }
1410     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1411   } else {
1412     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1413     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1414     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1415     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1416   }
1417   ierr = PetscFree(column_values);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1423 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1424 {
1425   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1426   PetscErrorCode    ierr;
1427   PetscMPIInt       rank = aij->rank,size = aij->size;
1428   PetscBool         isdraw,iascii,isbinary;
1429   PetscViewer       sviewer;
1430   PetscViewerFormat format;
1431 
1432   PetscFunctionBegin;
1433   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1434   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1435   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1436   if (iascii) {
1437     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1438     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1439       MatInfo    info;
1440       PetscBool  inodes;
1441 
1442       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1443       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1444       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1445       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1446       if (!inodes) {
1447         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1448 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1449       } else {
1450         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1451 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1452       }
1453       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1454       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1455       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1456       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1457       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1458       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1459       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1460       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1461       PetscFunctionReturn(0);
1462     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1463       PetscInt   inodecount,inodelimit,*inodes;
1464       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1465       if (inodes) {
1466         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1467       } else {
1468         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1469       }
1470       PetscFunctionReturn(0);
1471     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1472       PetscFunctionReturn(0);
1473     }
1474   } else if (isbinary) {
1475     if (size == 1) {
1476       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1477       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1478     } else {
1479       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1480     }
1481     PetscFunctionReturn(0);
1482   } else if (isdraw) {
1483     PetscDraw  draw;
1484     PetscBool  isnull;
1485     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1486     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1487   }
1488 
1489   if (size == 1) {
1490     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1491     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1492   } else {
1493     /* assemble the entire matrix onto first processor. */
1494     Mat         A;
1495     Mat_SeqAIJ  *Aloc;
1496     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1497     MatScalar   *a;
1498 
1499     if (mat->rmap->N > 1024) {
1500       PetscBool  flg = PETSC_FALSE;
1501 
1502       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1503       if (!flg) {
1504         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1505       }
1506     }
1507 
1508     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1509     if (!rank) {
1510       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1511     } else {
1512       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1513     }
1514     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1515     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1516     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1517     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1518 
1519     /* copy over the A part */
1520     Aloc = (Mat_SeqAIJ*)aij->A->data;
1521     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1522     row = mat->rmap->rstart;
1523     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1524     for (i=0; i<m; i++) {
1525       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1526       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1527     }
1528     aj = Aloc->j;
1529     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1530 
1531     /* copy over the B part */
1532     Aloc = (Mat_SeqAIJ*)aij->B->data;
1533     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1534     row  = mat->rmap->rstart;
1535     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1536     ct   = cols;
1537     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1538     for (i=0; i<m; i++) {
1539       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1540       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1541     }
1542     ierr = PetscFree(ct);CHKERRQ(ierr);
1543     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1544     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1545     /*
1546        Everyone has to call to draw the matrix since the graphics waits are
1547        synchronized across all processors that share the PetscDraw object
1548     */
1549     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1550     if (!rank) {
1551       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1552       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1553       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1554       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1555     }
1556     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1557     ierr = MatDestroy(&A);CHKERRQ(ierr);
1558   }
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatView_MPIAIJ"
1564 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1565 {
1566   PetscErrorCode ierr;
1567   PetscBool      iascii,isdraw,issocket,isbinary;
1568 
1569   PetscFunctionBegin;
1570   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1571   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1572   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1573   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1574   if (iascii || isdraw || isbinary || issocket) {
1575     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1576   } else {
1577     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1578   }
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatSOR_MPIAIJ"
1584 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1585 {
1586   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1587   PetscErrorCode ierr;
1588   Vec            bb1 = 0;
1589   PetscBool      hasop;
1590 
1591   PetscFunctionBegin;
1592   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1593     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1594   }
1595 
1596   if (flag == SOR_APPLY_UPPER) {
1597     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1598     PetscFunctionReturn(0);
1599   }
1600 
1601   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1602     if (flag & SOR_ZERO_INITIAL_GUESS) {
1603       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1604       its--;
1605     }
1606 
1607     while (its--) {
1608       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1609       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1610 
1611       /* update rhs: bb1 = bb - B*x */
1612       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1613       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1614 
1615       /* local sweep */
1616       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1617     }
1618   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1619     if (flag & SOR_ZERO_INITIAL_GUESS) {
1620       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1621       its--;
1622     }
1623     while (its--) {
1624       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1626 
1627       /* update rhs: bb1 = bb - B*x */
1628       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1629       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1630 
1631       /* local sweep */
1632       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1633     }
1634   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1635     if (flag & SOR_ZERO_INITIAL_GUESS) {
1636       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1637       its--;
1638     }
1639     while (its--) {
1640       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1642 
1643       /* update rhs: bb1 = bb - B*x */
1644       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1645       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1646 
1647       /* local sweep */
1648       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1649     }
1650   }  else if (flag & SOR_EISENSTAT) {
1651     Vec         xx1;
1652 
1653     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1654     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1655 
1656     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1658     if (!mat->diag) {
1659       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1660       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1661     }
1662     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1663     if (hasop) {
1664       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1665     } else {
1666       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1667     }
1668     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1669 
1670     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1671 
1672     /* local sweep */
1673     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1674     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1675     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1676   } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1677 
1678   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatPermute_MPIAIJ"
1684 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1685 {
1686   MPI_Comm       comm,pcomm;
1687   PetscInt       first,local_size,nrows;
1688   const PetscInt *rows;
1689   PetscMPIInt    size;
1690   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1691   PetscErrorCode ierr;
1692 
1693   PetscFunctionBegin;
1694   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1695   /* make a collective version of 'rowp' */
1696   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1697   if (pcomm==comm) {
1698     crowp = rowp;
1699   } else {
1700     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1701     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1702     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1703     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1704   }
1705   /* collect the global row permutation and invert it */
1706   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1707   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1708   if (pcomm!=comm) {
1709     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1710   }
1711   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1712   /* get the local target indices */
1713   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1714   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1715   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1716   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1717   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1718   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1719   /* the column permutation is so much easier;
1720      make a local version of 'colp' and invert it */
1721   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1722   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1723   if (size==1) {
1724     lcolp = colp;
1725   } else {
1726     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1727     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1728     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1729   }
1730   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1731   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1732   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1733   if (size>1) {
1734     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1735     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1736   }
1737   /* now we just get the submatrix */
1738   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1739   /* clean up */
1740   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1741   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 #undef __FUNCT__
1746 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1747 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1748 {
1749   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1750   Mat            A = mat->A,B = mat->B;
1751   PetscErrorCode ierr;
1752   PetscReal      isend[5],irecv[5];
1753 
1754   PetscFunctionBegin;
1755   info->block_size     = 1.0;
1756   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1757   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1758   isend[3] = info->memory;  isend[4] = info->mallocs;
1759   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1760   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1761   isend[3] += info->memory;  isend[4] += info->mallocs;
1762   if (flag == MAT_LOCAL) {
1763     info->nz_used      = isend[0];
1764     info->nz_allocated = isend[1];
1765     info->nz_unneeded  = isend[2];
1766     info->memory       = isend[3];
1767     info->mallocs      = isend[4];
1768   } else if (flag == MAT_GLOBAL_MAX) {
1769     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1770     info->nz_used      = irecv[0];
1771     info->nz_allocated = irecv[1];
1772     info->nz_unneeded  = irecv[2];
1773     info->memory       = irecv[3];
1774     info->mallocs      = irecv[4];
1775   } else if (flag == MAT_GLOBAL_SUM) {
1776     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1777     info->nz_used      = irecv[0];
1778     info->nz_allocated = irecv[1];
1779     info->nz_unneeded  = irecv[2];
1780     info->memory       = irecv[3];
1781     info->mallocs      = irecv[4];
1782   }
1783   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1784   info->fill_ratio_needed = 0;
1785   info->factor_mallocs    = 0;
1786 
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 #undef __FUNCT__
1791 #define __FUNCT__ "MatSetOption_MPIAIJ"
1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1793 {
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1795   PetscErrorCode ierr;
1796 
1797   PetscFunctionBegin;
1798   switch (op) {
1799   case MAT_NEW_NONZERO_LOCATIONS:
1800   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1801   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1802   case MAT_KEEP_NONZERO_PATTERN:
1803   case MAT_NEW_NONZERO_LOCATION_ERR:
1804   case MAT_USE_INODES:
1805   case MAT_IGNORE_ZERO_ENTRIES:
1806     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1807     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1808     break;
1809   case MAT_ROW_ORIENTED:
1810     a->roworiented = flg;
1811     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1812     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1813     break;
1814   case MAT_NEW_DIAGONALS:
1815     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1816     break;
1817   case MAT_IGNORE_OFF_PROC_ENTRIES:
1818     a->donotstash = flg;
1819     break;
1820   case MAT_SPD:
1821     A->spd_set                         = PETSC_TRUE;
1822     A->spd                             = flg;
1823     if (flg) {
1824       A->symmetric                     = PETSC_TRUE;
1825       A->structurally_symmetric        = PETSC_TRUE;
1826       A->symmetric_set                 = PETSC_TRUE;
1827       A->structurally_symmetric_set    = PETSC_TRUE;
1828     }
1829     break;
1830   case MAT_SYMMETRIC:
1831     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1832     break;
1833   case MAT_STRUCTURALLY_SYMMETRIC:
1834     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1835     break;
1836   case MAT_HERMITIAN:
1837     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1838     break;
1839   case MAT_SYMMETRY_ETERNAL:
1840     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1841     break;
1842   default:
1843     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1844   }
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatGetRow_MPIAIJ"
1850 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1851 {
1852   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1853   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1854   PetscErrorCode ierr;
1855   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1856   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1857   PetscInt       *cmap,*idx_p;
1858 
1859   PetscFunctionBegin;
1860   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1861   mat->getrowactive = PETSC_TRUE;
1862 
1863   if (!mat->rowvalues && (idx || v)) {
1864     /*
1865         allocate enough space to hold information from the longest row.
1866     */
1867     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1868     PetscInt   max = 1,tmp;
1869     for (i=0; i<matin->rmap->n; i++) {
1870       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1871       if (max < tmp) { max = tmp; }
1872     }
1873     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1874   }
1875 
1876   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1877   lrow = row - rstart;
1878 
1879   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1880   if (!v)   {pvA = 0; pvB = 0;}
1881   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1882   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1883   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1884   nztot = nzA + nzB;
1885 
1886   cmap  = mat->garray;
1887   if (v  || idx) {
1888     if (nztot) {
1889       /* Sort by increasing column numbers, assuming A and B already sorted */
1890       PetscInt imark = -1;
1891       if (v) {
1892         *v = v_p = mat->rowvalues;
1893         for (i=0; i<nzB; i++) {
1894           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1895           else break;
1896         }
1897         imark = i;
1898         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1899         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1900       }
1901       if (idx) {
1902         *idx = idx_p = mat->rowindices;
1903         if (imark > -1) {
1904           for (i=0; i<imark; i++) {
1905             idx_p[i] = cmap[cworkB[i]];
1906           }
1907         } else {
1908           for (i=0; i<nzB; i++) {
1909             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1910             else break;
1911           }
1912           imark = i;
1913         }
1914         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1915         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1916       }
1917     } else {
1918       if (idx) *idx = 0;
1919       if (v)   *v   = 0;
1920     }
1921   }
1922   *nz = nztot;
1923   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1924   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 #undef __FUNCT__
1929 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1930 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1931 {
1932   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1933 
1934   PetscFunctionBegin;
1935   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1936   aij->getrowactive = PETSC_FALSE;
1937   PetscFunctionReturn(0);
1938 }
1939 
1940 #undef __FUNCT__
1941 #define __FUNCT__ "MatNorm_MPIAIJ"
1942 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1943 {
1944   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1945   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1946   PetscErrorCode ierr;
1947   PetscInt       i,j,cstart = mat->cmap->rstart;
1948   PetscReal      sum = 0.0;
1949   MatScalar      *v;
1950 
1951   PetscFunctionBegin;
1952   if (aij->size == 1) {
1953     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1954   } else {
1955     if (type == NORM_FROBENIUS) {
1956       v = amat->a;
1957       for (i=0; i<amat->nz; i++) {
1958 #if defined(PETSC_USE_COMPLEX)
1959         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1960 #else
1961         sum += (*v)*(*v); v++;
1962 #endif
1963       }
1964       v = bmat->a;
1965       for (i=0; i<bmat->nz; i++) {
1966 #if defined(PETSC_USE_COMPLEX)
1967         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1968 #else
1969         sum += (*v)*(*v); v++;
1970 #endif
1971       }
1972       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1973       *norm = PetscSqrtReal(*norm);
1974     } else if (type == NORM_1) { /* max column norm */
1975       PetscReal *tmp,*tmp2;
1976       PetscInt  *jj,*garray = aij->garray;
1977       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1978       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1979       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1980       *norm = 0.0;
1981       v = amat->a; jj = amat->j;
1982       for (j=0; j<amat->nz; j++) {
1983         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1984       }
1985       v = bmat->a; jj = bmat->j;
1986       for (j=0; j<bmat->nz; j++) {
1987         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1988       }
1989       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1990       for (j=0; j<mat->cmap->N; j++) {
1991         if (tmp2[j] > *norm) *norm = tmp2[j];
1992       }
1993       ierr = PetscFree(tmp);CHKERRQ(ierr);
1994       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1995     } else if (type == NORM_INFINITY) { /* max row norm */
1996       PetscReal ntemp = 0.0;
1997       for (j=0; j<aij->A->rmap->n; j++) {
1998         v = amat->a + amat->i[j];
1999         sum = 0.0;
2000         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2001           sum += PetscAbsScalar(*v); v++;
2002         }
2003         v = bmat->a + bmat->i[j];
2004         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2005           sum += PetscAbsScalar(*v); v++;
2006         }
2007         if (sum > ntemp) ntemp = sum;
2008       }
2009       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2010     } else {
2011       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2012     }
2013   }
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 #undef __FUNCT__
2018 #define __FUNCT__ "MatTranspose_MPIAIJ"
2019 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2020 {
2021   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2022   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2023   PetscErrorCode ierr;
2024   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2025   PetscInt       cstart=A->cmap->rstart,ncol;
2026   Mat            B;
2027   MatScalar      *array;
2028 
2029   PetscFunctionBegin;
2030   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2031 
2032   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2033   ai = Aloc->i; aj = Aloc->j;
2034   bi = Bloc->i; bj = Bloc->j;
2035   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2036     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2037     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2038     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2039     for (i=0; i<ai[ma]; i++){
2040       d_nnz[aj[i]] ++;
2041       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2042     }
2043 
2044     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2045     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2046     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2047     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2048     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2049   } else {
2050     B = *matout;
2051   }
2052 
2053   /* copy over the A part */
2054   array = Aloc->a;
2055   row = A->rmap->rstart;
2056   for (i=0; i<ma; i++) {
2057     ncol = ai[i+1]-ai[i];
2058     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2059     row++; array += ncol; aj += ncol;
2060   }
2061   aj = Aloc->j;
2062   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2063 
2064   /* copy over the B part */
2065   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2066   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2067   array = Bloc->a;
2068   row = A->rmap->rstart;
2069   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2070   cols_tmp = cols;
2071   for (i=0; i<mb; i++) {
2072     ncol = bi[i+1]-bi[i];
2073     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2074     row++; array += ncol; cols_tmp += ncol;
2075   }
2076   ierr = PetscFree(cols);CHKERRQ(ierr);
2077 
2078   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2080   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2081     *matout = B;
2082   } else {
2083     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2084   }
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 #undef __FUNCT__
2089 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2090 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2091 {
2092   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2093   Mat            a = aij->A,b = aij->B;
2094   PetscErrorCode ierr;
2095   PetscInt       s1,s2,s3;
2096 
2097   PetscFunctionBegin;
2098   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2099   if (rr) {
2100     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2101     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2102     /* Overlap communication with computation. */
2103     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2104   }
2105   if (ll) {
2106     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2107     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2108     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2109   }
2110   /* scale  the diagonal block */
2111   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2112 
2113   if (rr) {
2114     /* Do a scatter end and then right scale the off-diagonal block */
2115     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2116     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2117   }
2118 
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 #undef __FUNCT__
2123 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2124 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2125 {
2126   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2127   PetscErrorCode ierr;
2128 
2129   PetscFunctionBegin;
2130   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2131   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2132   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2133   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2134   PetscFunctionReturn(0);
2135 }
2136 #undef __FUNCT__
2137 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2138 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2139 {
2140   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2141   PetscErrorCode ierr;
2142 
2143   PetscFunctionBegin;
2144   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 #undef __FUNCT__
2149 #define __FUNCT__ "MatEqual_MPIAIJ"
2150 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2151 {
2152   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2153   Mat            a,b,c,d;
2154   PetscBool      flg;
2155   PetscErrorCode ierr;
2156 
2157   PetscFunctionBegin;
2158   a = matA->A; b = matA->B;
2159   c = matB->A; d = matB->B;
2160 
2161   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2162   if (flg) {
2163     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2164   }
2165   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatCopy_MPIAIJ"
2171 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2172 {
2173   PetscErrorCode ierr;
2174   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2175   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2176 
2177   PetscFunctionBegin;
2178   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2179   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2180     /* because of the column compression in the off-processor part of the matrix a->B,
2181        the number of columns in a->B and b->B may be different, hence we cannot call
2182        the MatCopy() directly on the two parts. If need be, we can provide a more
2183        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2184        then copying the submatrices */
2185     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2186   } else {
2187     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2188     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2189   }
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 #undef __FUNCT__
2194 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2195 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2196 {
2197   PetscErrorCode ierr;
2198 
2199   PetscFunctionBegin;
2200   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 #undef __FUNCT__
2205 #define __FUNCT__ "MatAXPY_MPIAIJ"
2206 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2207 {
2208   PetscErrorCode ierr;
2209   PetscInt       i;
2210   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2211   PetscBLASInt   bnz,one=1;
2212   Mat_SeqAIJ     *x,*y;
2213 
2214   PetscFunctionBegin;
2215   if (str == SAME_NONZERO_PATTERN) {
2216     PetscScalar alpha = a;
2217     x = (Mat_SeqAIJ *)xx->A->data;
2218     y = (Mat_SeqAIJ *)yy->A->data;
2219     bnz = PetscBLASIntCast(x->nz);
2220     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2221     x = (Mat_SeqAIJ *)xx->B->data;
2222     y = (Mat_SeqAIJ *)yy->B->data;
2223     bnz = PetscBLASIntCast(x->nz);
2224     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2225   } else if (str == SUBSET_NONZERO_PATTERN) {
2226     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2227 
2228     x = (Mat_SeqAIJ *)xx->B->data;
2229     y = (Mat_SeqAIJ *)yy->B->data;
2230     if (y->xtoy && y->XtoY != xx->B) {
2231       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2232       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2233     }
2234     if (!y->xtoy) { /* get xtoy */
2235       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2236       y->XtoY = xx->B;
2237       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2238     }
2239     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2240   } else {
2241     Mat B;
2242     PetscInt *nnz_d,*nnz_o;
2243     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2244     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2245     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2246     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2247     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2248     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2249     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2250     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2251     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2252     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2253     ierr = MatHeaderReplace(Y,B);
2254     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2255     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2256   }
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2261 
2262 #undef __FUNCT__
2263 #define __FUNCT__ "MatConjugate_MPIAIJ"
2264 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2265 {
2266 #if defined(PETSC_USE_COMPLEX)
2267   PetscErrorCode ierr;
2268   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2269 
2270   PetscFunctionBegin;
2271   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2272   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2273 #else
2274   PetscFunctionBegin;
2275 #endif
2276   PetscFunctionReturn(0);
2277 }
2278 
2279 #undef __FUNCT__
2280 #define __FUNCT__ "MatRealPart_MPIAIJ"
2281 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2282 {
2283   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2284   PetscErrorCode ierr;
2285 
2286   PetscFunctionBegin;
2287   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2288   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 #undef __FUNCT__
2293 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2294 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2295 {
2296   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2297   PetscErrorCode ierr;
2298 
2299   PetscFunctionBegin;
2300   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2301   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2302   PetscFunctionReturn(0);
2303 }
2304 
2305 #ifdef PETSC_HAVE_PBGL
2306 
2307 #include <boost/parallel/mpi/bsp_process_group.hpp>
2308 #include <boost/graph/distributed/ilu_default_graph.hpp>
2309 #include <boost/graph/distributed/ilu_0_block.hpp>
2310 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2311 #include <boost/graph/distributed/petsc/interface.hpp>
2312 #include <boost/multi_array.hpp>
2313 #include <boost/parallel/distributed_property_map->hpp>
2314 
2315 #undef __FUNCT__
2316 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2317 /*
2318   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2319 */
2320 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2321 {
2322   namespace petsc = boost::distributed::petsc;
2323 
2324   namespace graph_dist = boost::graph::distributed;
2325   using boost::graph::distributed::ilu_default::process_group_type;
2326   using boost::graph::ilu_permuted;
2327 
2328   PetscBool       row_identity, col_identity;
2329   PetscContainer  c;
2330   PetscInt        m, n, M, N;
2331   PetscErrorCode  ierr;
2332 
2333   PetscFunctionBegin;
2334   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2335   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2336   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2337   if (!row_identity || !col_identity) {
2338     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2339   }
2340 
2341   process_group_type pg;
2342   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2343   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2344   lgraph_type&   level_graph = *lgraph_p;
2345   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2346 
2347   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2348   ilu_permuted(level_graph);
2349 
2350   /* put together the new matrix */
2351   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2352   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2353   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2354   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2355   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2356   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2357   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2358 
2359   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2360   ierr = PetscContainerSetPointer(c, lgraph_p);
2361   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2362   ierr = PetscContainerDestroy(&c);
2363   PetscFunctionReturn(0);
2364 }
2365 
2366 #undef __FUNCT__
2367 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2368 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2369 {
2370   PetscFunctionBegin;
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 #undef __FUNCT__
2375 #define __FUNCT__ "MatSolve_MPIAIJ"
2376 /*
2377   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2378 */
2379 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2380 {
2381   namespace graph_dist = boost::graph::distributed;
2382 
2383   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2384   lgraph_type*   lgraph_p;
2385   PetscContainer c;
2386   PetscErrorCode ierr;
2387 
2388   PetscFunctionBegin;
2389   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2390   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2391   ierr = VecCopy(b, x);CHKERRQ(ierr);
2392 
2393   PetscScalar* array_x;
2394   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2395   PetscInt sx;
2396   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2397 
2398   PetscScalar* array_b;
2399   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2400   PetscInt sb;
2401   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2402 
2403   lgraph_type&   level_graph = *lgraph_p;
2404   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2405 
2406   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2407   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2408                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2409 
2410   typedef boost::iterator_property_map<array_ref_type::iterator,
2411                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2412   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2413                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2414 
2415   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2416 
2417   PetscFunctionReturn(0);
2418 }
2419 #endif
2420 
2421 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2422   PetscInt       nzlocal,nsends,nrecvs;
2423   PetscMPIInt    *send_rank,*recv_rank;
2424   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2425   PetscScalar    *sbuf_a,**rbuf_a;
2426   PetscErrorCode (*Destroy)(Mat);
2427 } Mat_Redundant;
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2431 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2432 {
2433   PetscErrorCode       ierr;
2434   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2435   PetscInt             i;
2436 
2437   PetscFunctionBegin;
2438   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2439   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2440   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2441   for (i=0; i<redund->nrecvs; i++){
2442     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2443     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2444   }
2445   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2446   ierr = PetscFree(redund);CHKERRQ(ierr);
2447   PetscFunctionReturn(0);
2448 }
2449 
2450 #undef __FUNCT__
2451 #define __FUNCT__ "MatDestroy_MatRedundant"
2452 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2453 {
2454   PetscErrorCode  ierr;
2455   PetscContainer  container;
2456   Mat_Redundant   *redund=PETSC_NULL;
2457 
2458   PetscFunctionBegin;
2459   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2460   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2461   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2462   A->ops->destroy = redund->Destroy;
2463   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2464   if (A->ops->destroy) {
2465     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2466   }
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 #undef __FUNCT__
2471 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2472 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2473 {
2474   PetscMPIInt    rank,size;
2475   MPI_Comm       comm=((PetscObject)mat)->comm;
2476   PetscErrorCode ierr;
2477   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2478   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2479   PetscInt       *rowrange=mat->rmap->range;
2480   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2481   Mat            A=aij->A,B=aij->B,C=*matredundant;
2482   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2483   PetscScalar    *sbuf_a;
2484   PetscInt       nzlocal=a->nz+b->nz;
2485   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2486   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2487   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2488   MatScalar      *aworkA,*aworkB;
2489   PetscScalar    *vals;
2490   PetscMPIInt    tag1,tag2,tag3,imdex;
2491   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2492                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2493   MPI_Status     recv_status,*send_status;
2494   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2495   PetscInt       **rbuf_j=PETSC_NULL;
2496   PetscScalar    **rbuf_a=PETSC_NULL;
2497   Mat_Redundant  *redund=PETSC_NULL;
2498   PetscContainer container;
2499 
2500   PetscFunctionBegin;
2501   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2502   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2503 
2504   if (reuse == MAT_REUSE_MATRIX) {
2505     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2506     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2507     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2508     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2509     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2510     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2511     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2512     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2513 
2514     nsends    = redund->nsends;
2515     nrecvs    = redund->nrecvs;
2516     send_rank = redund->send_rank;
2517     recv_rank = redund->recv_rank;
2518     sbuf_nz   = redund->sbuf_nz;
2519     rbuf_nz   = redund->rbuf_nz;
2520     sbuf_j    = redund->sbuf_j;
2521     sbuf_a    = redund->sbuf_a;
2522     rbuf_j    = redund->rbuf_j;
2523     rbuf_a    = redund->rbuf_a;
2524   }
2525 
2526   if (reuse == MAT_INITIAL_MATRIX){
2527     PetscMPIInt  subrank,subsize;
2528     PetscInt     nleftover,np_subcomm;
2529     /* get the destination processors' id send_rank, nsends and nrecvs */
2530     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2531     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2532     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2533     np_subcomm = size/nsubcomm;
2534     nleftover  = size - nsubcomm*np_subcomm;
2535     nsends = 0; nrecvs = 0;
2536     for (i=0; i<size; i++){ /* i=rank*/
2537       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2538         send_rank[nsends] = i; nsends++;
2539         recv_rank[nrecvs++] = i;
2540       }
2541     }
2542     if (rank >= size - nleftover){/* this proc is a leftover processor */
2543       i = size-nleftover-1;
2544       j = 0;
2545       while (j < nsubcomm - nleftover){
2546         send_rank[nsends++] = i;
2547         i--; j++;
2548       }
2549     }
2550 
2551     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2552       for (i=0; i<nleftover; i++){
2553         recv_rank[nrecvs++] = size-nleftover+i;
2554       }
2555     }
2556 
2557     /* allocate sbuf_j, sbuf_a */
2558     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2559     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2560     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2561   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2562 
2563   /* copy mat's local entries into the buffers */
2564   if (reuse == MAT_INITIAL_MATRIX){
2565     rownz_max = 0;
2566     rptr = sbuf_j;
2567     cols = sbuf_j + rend-rstart + 1;
2568     vals = sbuf_a;
2569     rptr[0] = 0;
2570     for (i=0; i<rend-rstart; i++){
2571       row = i + rstart;
2572       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2573       ncols  = nzA + nzB;
2574       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2575       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2576       /* load the column indices for this row into cols */
2577       lwrite = 0;
2578       for (l=0; l<nzB; l++) {
2579         if ((ctmp = bmap[cworkB[l]]) < cstart){
2580           vals[lwrite]   = aworkB[l];
2581           cols[lwrite++] = ctmp;
2582         }
2583       }
2584       for (l=0; l<nzA; l++){
2585         vals[lwrite]   = aworkA[l];
2586         cols[lwrite++] = cstart + cworkA[l];
2587       }
2588       for (l=0; l<nzB; l++) {
2589         if ((ctmp = bmap[cworkB[l]]) >= cend){
2590           vals[lwrite]   = aworkB[l];
2591           cols[lwrite++] = ctmp;
2592         }
2593       }
2594       vals += ncols;
2595       cols += ncols;
2596       rptr[i+1] = rptr[i] + ncols;
2597       if (rownz_max < ncols) rownz_max = ncols;
2598     }
2599     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2600   } else { /* only copy matrix values into sbuf_a */
2601     rptr = sbuf_j;
2602     vals = sbuf_a;
2603     rptr[0] = 0;
2604     for (i=0; i<rend-rstart; i++){
2605       row = i + rstart;
2606       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2607       ncols  = nzA + nzB;
2608       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2609       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2610       lwrite = 0;
2611       for (l=0; l<nzB; l++) {
2612         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2613       }
2614       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2615       for (l=0; l<nzB; l++) {
2616         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2617       }
2618       vals += ncols;
2619       rptr[i+1] = rptr[i] + ncols;
2620     }
2621   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2622 
2623   /* send nzlocal to others, and recv other's nzlocal */
2624   /*--------------------------------------------------*/
2625   if (reuse == MAT_INITIAL_MATRIX){
2626     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2627     s_waits2 = s_waits3 + nsends;
2628     s_waits1 = s_waits2 + nsends;
2629     r_waits1 = s_waits1 + nsends;
2630     r_waits2 = r_waits1 + nrecvs;
2631     r_waits3 = r_waits2 + nrecvs;
2632   } else {
2633     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2634     r_waits3 = s_waits3 + nsends;
2635   }
2636 
2637   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2638   if (reuse == MAT_INITIAL_MATRIX){
2639     /* get new tags to keep the communication clean */
2640     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2641     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2642     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2643 
2644     /* post receives of other's nzlocal */
2645     for (i=0; i<nrecvs; i++){
2646       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2647     }
2648     /* send nzlocal to others */
2649     for (i=0; i<nsends; i++){
2650       sbuf_nz[i] = nzlocal;
2651       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2652     }
2653     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2654     count = nrecvs;
2655     while (count) {
2656       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2657       recv_rank[imdex] = recv_status.MPI_SOURCE;
2658       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2659       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2660 
2661       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2662       rbuf_nz[imdex] += i + 2;
2663       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2664       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2665       count--;
2666     }
2667     /* wait on sends of nzlocal */
2668     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2669     /* send mat->i,j to others, and recv from other's */
2670     /*------------------------------------------------*/
2671     for (i=0; i<nsends; i++){
2672       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2673       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2674     }
2675     /* wait on receives of mat->i,j */
2676     /*------------------------------*/
2677     count = nrecvs;
2678     while (count) {
2679       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2680       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2681       count--;
2682     }
2683     /* wait on sends of mat->i,j */
2684     /*---------------------------*/
2685     if (nsends) {
2686       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2687     }
2688   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2689 
2690   /* post receives, send and receive mat->a */
2691   /*----------------------------------------*/
2692   for (imdex=0; imdex<nrecvs; imdex++) {
2693     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2694   }
2695   for (i=0; i<nsends; i++){
2696     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2697   }
2698   count = nrecvs;
2699   while (count) {
2700     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2701     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2702     count--;
2703   }
2704   if (nsends) {
2705     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2706   }
2707 
2708   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2709 
2710   /* create redundant matrix */
2711   /*-------------------------*/
2712   if (reuse == MAT_INITIAL_MATRIX){
2713     /* compute rownz_max for preallocation */
2714     for (imdex=0; imdex<nrecvs; imdex++){
2715       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2716       rptr = rbuf_j[imdex];
2717       for (i=0; i<j; i++){
2718         ncols = rptr[i+1] - rptr[i];
2719         if (rownz_max < ncols) rownz_max = ncols;
2720       }
2721     }
2722 
2723     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2724     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2725     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2726     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2727     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2728   } else {
2729     C = *matredundant;
2730   }
2731 
2732   /* insert local matrix entries */
2733   rptr = sbuf_j;
2734   cols = sbuf_j + rend-rstart + 1;
2735   vals = sbuf_a;
2736   for (i=0; i<rend-rstart; i++){
2737     row   = i + rstart;
2738     ncols = rptr[i+1] - rptr[i];
2739     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2740     vals += ncols;
2741     cols += ncols;
2742   }
2743   /* insert received matrix entries */
2744   for (imdex=0; imdex<nrecvs; imdex++){
2745     rstart = rowrange[recv_rank[imdex]];
2746     rend   = rowrange[recv_rank[imdex]+1];
2747     rptr = rbuf_j[imdex];
2748     cols = rbuf_j[imdex] + rend-rstart + 1;
2749     vals = rbuf_a[imdex];
2750     for (i=0; i<rend-rstart; i++){
2751       row   = i + rstart;
2752       ncols = rptr[i+1] - rptr[i];
2753       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2754       vals += ncols;
2755       cols += ncols;
2756     }
2757   }
2758   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2759   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2760   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2761   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2762   if (reuse == MAT_INITIAL_MATRIX) {
2763     PetscContainer container;
2764     *matredundant = C;
2765     /* create a supporting struct and attach it to C for reuse */
2766     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2767     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2768     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2769     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2770     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2771     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2772 
2773     redund->nzlocal = nzlocal;
2774     redund->nsends  = nsends;
2775     redund->nrecvs  = nrecvs;
2776     redund->send_rank = send_rank;
2777     redund->recv_rank = recv_rank;
2778     redund->sbuf_nz = sbuf_nz;
2779     redund->rbuf_nz = rbuf_nz;
2780     redund->sbuf_j  = sbuf_j;
2781     redund->sbuf_a  = sbuf_a;
2782     redund->rbuf_j  = rbuf_j;
2783     redund->rbuf_a  = rbuf_a;
2784 
2785     redund->Destroy = C->ops->destroy;
2786     C->ops->destroy = MatDestroy_MatRedundant;
2787   }
2788   PetscFunctionReturn(0);
2789 }
2790 
2791 #undef __FUNCT__
2792 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2793 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2794 {
2795   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2796   PetscErrorCode ierr;
2797   PetscInt       i,*idxb = 0;
2798   PetscScalar    *va,*vb;
2799   Vec            vtmp;
2800 
2801   PetscFunctionBegin;
2802   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2803   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2804   if (idx) {
2805     for (i=0; i<A->rmap->n; i++) {
2806       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2807     }
2808   }
2809 
2810   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2811   if (idx) {
2812     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2813   }
2814   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2815   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2816 
2817   for (i=0; i<A->rmap->n; i++){
2818     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2819       va[i] = vb[i];
2820       if (idx) idx[i] = a->garray[idxb[i]];
2821     }
2822   }
2823 
2824   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2825   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2826   ierr = PetscFree(idxb);CHKERRQ(ierr);
2827   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2828   PetscFunctionReturn(0);
2829 }
2830 
2831 #undef __FUNCT__
2832 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2833 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2834 {
2835   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2836   PetscErrorCode ierr;
2837   PetscInt       i,*idxb = 0;
2838   PetscScalar    *va,*vb;
2839   Vec            vtmp;
2840 
2841   PetscFunctionBegin;
2842   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2843   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2844   if (idx) {
2845     for (i=0; i<A->cmap->n; i++) {
2846       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2847     }
2848   }
2849 
2850   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2851   if (idx) {
2852     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2853   }
2854   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2855   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2856 
2857   for (i=0; i<A->rmap->n; i++){
2858     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2859       va[i] = vb[i];
2860       if (idx) idx[i] = a->garray[idxb[i]];
2861     }
2862   }
2863 
2864   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2865   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2866   ierr = PetscFree(idxb);CHKERRQ(ierr);
2867   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2868   PetscFunctionReturn(0);
2869 }
2870 
2871 #undef __FUNCT__
2872 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2873 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2874 {
2875   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2876   PetscInt       n      = A->rmap->n;
2877   PetscInt       cstart = A->cmap->rstart;
2878   PetscInt      *cmap   = mat->garray;
2879   PetscInt      *diagIdx, *offdiagIdx;
2880   Vec            diagV, offdiagV;
2881   PetscScalar   *a, *diagA, *offdiagA;
2882   PetscInt       r;
2883   PetscErrorCode ierr;
2884 
2885   PetscFunctionBegin;
2886   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2887   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2888   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2889   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2890   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2891   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2892   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2893   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2894   for(r = 0; r < n; ++r) {
2895     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2896       a[r]   = diagA[r];
2897       idx[r] = cstart + diagIdx[r];
2898     } else {
2899       a[r]   = offdiagA[r];
2900       idx[r] = cmap[offdiagIdx[r]];
2901     }
2902   }
2903   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2904   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2905   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2906   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2907   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2908   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2909   PetscFunctionReturn(0);
2910 }
2911 
2912 #undef __FUNCT__
2913 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2914 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2915 {
2916   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2917   PetscInt       n      = A->rmap->n;
2918   PetscInt       cstart = A->cmap->rstart;
2919   PetscInt      *cmap   = mat->garray;
2920   PetscInt      *diagIdx, *offdiagIdx;
2921   Vec            diagV, offdiagV;
2922   PetscScalar   *a, *diagA, *offdiagA;
2923   PetscInt       r;
2924   PetscErrorCode ierr;
2925 
2926   PetscFunctionBegin;
2927   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2928   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2929   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2930   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2931   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2932   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2933   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2934   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2935   for(r = 0; r < n; ++r) {
2936     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2937       a[r]   = diagA[r];
2938       idx[r] = cstart + diagIdx[r];
2939     } else {
2940       a[r]   = offdiagA[r];
2941       idx[r] = cmap[offdiagIdx[r]];
2942     }
2943   }
2944   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2945   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2946   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2947   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2948   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2949   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2950   PetscFunctionReturn(0);
2951 }
2952 
2953 #undef __FUNCT__
2954 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2955 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2956 {
2957   PetscErrorCode ierr;
2958   Mat            *dummy;
2959 
2960   PetscFunctionBegin;
2961   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2962   *newmat = *dummy;
2963   ierr = PetscFree(dummy);CHKERRQ(ierr);
2964   PetscFunctionReturn(0);
2965 }
2966 
2967 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2968 
2969 #undef __FUNCT__
2970 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2971 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2972 {
2973   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2974   PetscErrorCode ierr;
2975 
2976   PetscFunctionBegin;
2977   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2978   PetscFunctionReturn(0);
2979 }
2980 
2981 
2982 /* -------------------------------------------------------------------*/
2983 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2984        MatGetRow_MPIAIJ,
2985        MatRestoreRow_MPIAIJ,
2986        MatMult_MPIAIJ,
2987 /* 4*/ MatMultAdd_MPIAIJ,
2988        MatMultTranspose_MPIAIJ,
2989        MatMultTransposeAdd_MPIAIJ,
2990 #ifdef PETSC_HAVE_PBGL
2991        MatSolve_MPIAIJ,
2992 #else
2993        0,
2994 #endif
2995        0,
2996        0,
2997 /*10*/ 0,
2998        0,
2999        0,
3000        MatSOR_MPIAIJ,
3001        MatTranspose_MPIAIJ,
3002 /*15*/ MatGetInfo_MPIAIJ,
3003        MatEqual_MPIAIJ,
3004        MatGetDiagonal_MPIAIJ,
3005        MatDiagonalScale_MPIAIJ,
3006        MatNorm_MPIAIJ,
3007 /*20*/ MatAssemblyBegin_MPIAIJ,
3008        MatAssemblyEnd_MPIAIJ,
3009        MatSetOption_MPIAIJ,
3010        MatZeroEntries_MPIAIJ,
3011 /*24*/ MatZeroRows_MPIAIJ,
3012        0,
3013 #ifdef PETSC_HAVE_PBGL
3014        0,
3015 #else
3016        0,
3017 #endif
3018        0,
3019        0,
3020 /*29*/ MatSetUpPreallocation_MPIAIJ,
3021 #ifdef PETSC_HAVE_PBGL
3022        0,
3023 #else
3024        0,
3025 #endif
3026        0,
3027        0,
3028        0,
3029 /*34*/ MatDuplicate_MPIAIJ,
3030        0,
3031        0,
3032        0,
3033        0,
3034 /*39*/ MatAXPY_MPIAIJ,
3035        MatGetSubMatrices_MPIAIJ,
3036        MatIncreaseOverlap_MPIAIJ,
3037        MatGetValues_MPIAIJ,
3038        MatCopy_MPIAIJ,
3039 /*44*/ MatGetRowMax_MPIAIJ,
3040        MatScale_MPIAIJ,
3041        0,
3042        0,
3043        MatZeroRowsColumns_MPIAIJ,
3044 /*49*/ MatSetBlockSize_MPIAIJ,
3045        0,
3046        0,
3047        0,
3048        0,
3049 /*54*/ MatFDColoringCreate_MPIAIJ,
3050        0,
3051        MatSetUnfactored_MPIAIJ,
3052        MatPermute_MPIAIJ,
3053        0,
3054 /*59*/ MatGetSubMatrix_MPIAIJ,
3055        MatDestroy_MPIAIJ,
3056        MatView_MPIAIJ,
3057        0,
3058        0,
3059 /*64*/ 0,
3060        0,
3061        0,
3062        0,
3063        0,
3064 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3065        MatGetRowMinAbs_MPIAIJ,
3066        0,
3067        MatSetColoring_MPIAIJ,
3068 #if defined(PETSC_HAVE_ADIC)
3069        MatSetValuesAdic_MPIAIJ,
3070 #else
3071        0,
3072 #endif
3073        MatSetValuesAdifor_MPIAIJ,
3074 /*75*/ MatFDColoringApply_AIJ,
3075        0,
3076        0,
3077        0,
3078        0,
3079 /*80*/ 0,
3080        0,
3081        0,
3082 /*83*/ MatLoad_MPIAIJ,
3083        0,
3084        0,
3085        0,
3086        0,
3087        0,
3088 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3089        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3090        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3091        MatPtAP_Basic,
3092        MatPtAPSymbolic_MPIAIJ,
3093 /*94*/ MatPtAPNumeric_MPIAIJ,
3094        0,
3095        0,
3096        0,
3097        0,
3098 /*99*/ 0,
3099        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3100        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3101        MatConjugate_MPIAIJ,
3102        0,
3103 /*104*/MatSetValuesRow_MPIAIJ,
3104        MatRealPart_MPIAIJ,
3105        MatImaginaryPart_MPIAIJ,
3106        0,
3107        0,
3108 /*109*/0,
3109        MatGetRedundantMatrix_MPIAIJ,
3110        MatGetRowMin_MPIAIJ,
3111        0,
3112        0,
3113 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
3114        0,
3115        0,
3116        0,
3117        0,
3118 /*119*/0,
3119        0,
3120        0,
3121        0,
3122        MatGetMultiProcBlock_MPIAIJ,
3123 /*124*/MatFindNonZeroRows_MPIAIJ,
3124        MatGetColumnNorms_MPIAIJ,
3125        MatInvertBlockDiagonal_MPIAIJ,
3126        0,
3127        MatGetSubMatricesParallel_MPIAIJ,
3128 /*129*/0
3129 };
3130 
3131 /* ----------------------------------------------------------------------------------------*/
3132 
3133 EXTERN_C_BEGIN
3134 #undef __FUNCT__
3135 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3136 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3137 {
3138   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3139   PetscErrorCode ierr;
3140 
3141   PetscFunctionBegin;
3142   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3143   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3144   PetscFunctionReturn(0);
3145 }
3146 EXTERN_C_END
3147 
3148 EXTERN_C_BEGIN
3149 #undef __FUNCT__
3150 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3151 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3152 {
3153   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3154   PetscErrorCode ierr;
3155 
3156   PetscFunctionBegin;
3157   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3158   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3159   PetscFunctionReturn(0);
3160 }
3161 EXTERN_C_END
3162 
3163 EXTERN_C_BEGIN
3164 #undef __FUNCT__
3165 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3166 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3167 {
3168   Mat_MPIAIJ     *b;
3169   PetscErrorCode ierr;
3170   PetscInt       i;
3171 
3172   PetscFunctionBegin;
3173   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3174   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3175   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3176   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3177 
3178   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3179   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3180   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3181   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3182   if (d_nnz) {
3183     for (i=0; i<B->rmap->n; i++) {
3184       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3185     }
3186   }
3187   if (o_nnz) {
3188     for (i=0; i<B->rmap->n; i++) {
3189       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3190     }
3191   }
3192   b = (Mat_MPIAIJ*)B->data;
3193 
3194   if (!B->preallocated) {
3195     /* Explicitly create 2 MATSEQAIJ matrices. */
3196     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3197     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3198     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3199     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3200     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3201     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3202     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3203     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3204   }
3205 
3206   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3207   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3208   B->preallocated = PETSC_TRUE;
3209   PetscFunctionReturn(0);
3210 }
3211 EXTERN_C_END
3212 
3213 #undef __FUNCT__
3214 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3215 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3216 {
3217   Mat            mat;
3218   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3219   PetscErrorCode ierr;
3220 
3221   PetscFunctionBegin;
3222   *newmat       = 0;
3223   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3224   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3225   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3226   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3227   a    = (Mat_MPIAIJ*)mat->data;
3228 
3229   mat->factortype    = matin->factortype;
3230   mat->rmap->bs      = matin->rmap->bs;
3231   mat->assembled    = PETSC_TRUE;
3232   mat->insertmode   = NOT_SET_VALUES;
3233   mat->preallocated = PETSC_TRUE;
3234 
3235   a->size           = oldmat->size;
3236   a->rank           = oldmat->rank;
3237   a->donotstash     = oldmat->donotstash;
3238   a->roworiented    = oldmat->roworiented;
3239   a->rowindices     = 0;
3240   a->rowvalues      = 0;
3241   a->getrowactive   = PETSC_FALSE;
3242 
3243   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3244   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3245 
3246   if (oldmat->colmap) {
3247 #if defined (PETSC_USE_CTABLE)
3248     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3249 #else
3250     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3251     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3252     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3253 #endif
3254   } else a->colmap = 0;
3255   if (oldmat->garray) {
3256     PetscInt len;
3257     len  = oldmat->B->cmap->n;
3258     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3259     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3260     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3261   } else a->garray = 0;
3262 
3263   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3264   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3265   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3266   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3267   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3268   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3269   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3270   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3271   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3272   *newmat = mat;
3273   PetscFunctionReturn(0);
3274 }
3275 
3276 
3277 
3278 #undef __FUNCT__
3279 #define __FUNCT__ "MatLoad_MPIAIJ"
3280 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3281 {
3282   PetscScalar    *vals,*svals;
3283   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3284   PetscErrorCode ierr;
3285   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3286   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3287   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3288   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3289   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3290   int            fd;
3291 
3292   PetscFunctionBegin;
3293   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3294   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3295   if (!rank) {
3296     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3297     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3298     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3299   }
3300 
3301   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3302 
3303   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3304   M = header[1]; N = header[2];
3305   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3306   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3307   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3308 
3309   /* If global sizes are set, check if they are consistent with that given in the file */
3310   if (sizesset) {
3311     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3312   }
3313   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3314   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3315 
3316   /* determine ownership of all rows */
3317   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3318   else m = newMat->rmap->n; /* Set by user */
3319 
3320   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3321   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3322 
3323   /* First process needs enough room for process with most rows */
3324   if (!rank) {
3325     mmax       = rowners[1];
3326     for (i=2; i<size; i++) {
3327       mmax = PetscMax(mmax,rowners[i]);
3328     }
3329   } else mmax = m;
3330 
3331   rowners[0] = 0;
3332   for (i=2; i<=size; i++) {
3333     rowners[i] += rowners[i-1];
3334   }
3335   rstart = rowners[rank];
3336   rend   = rowners[rank+1];
3337 
3338   /* distribute row lengths to all processors */
3339   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3340   if (!rank) {
3341     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3342     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3343     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3344     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3345     for (j=0; j<m; j++) {
3346       procsnz[0] += ourlens[j];
3347     }
3348     for (i=1; i<size; i++) {
3349       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3350       /* calculate the number of nonzeros on each processor */
3351       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3352         procsnz[i] += rowlengths[j];
3353       }
3354       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3355     }
3356     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3357   } else {
3358     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3359   }
3360 
3361   if (!rank) {
3362     /* determine max buffer needed and allocate it */
3363     maxnz = 0;
3364     for (i=0; i<size; i++) {
3365       maxnz = PetscMax(maxnz,procsnz[i]);
3366     }
3367     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3368 
3369     /* read in my part of the matrix column indices  */
3370     nz   = procsnz[0];
3371     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3372     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3373 
3374     /* read in every one elses and ship off */
3375     for (i=1; i<size; i++) {
3376       nz     = procsnz[i];
3377       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3378       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3379     }
3380     ierr = PetscFree(cols);CHKERRQ(ierr);
3381   } else {
3382     /* determine buffer space needed for message */
3383     nz = 0;
3384     for (i=0; i<m; i++) {
3385       nz += ourlens[i];
3386     }
3387     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3388 
3389     /* receive message of column indices*/
3390     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3391   }
3392 
3393   /* determine column ownership if matrix is not square */
3394   if (N != M) {
3395     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3396     else n = newMat->cmap->n;
3397     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3398     cstart = cend - n;
3399   } else {
3400     cstart = rstart;
3401     cend   = rend;
3402     n      = cend - cstart;
3403   }
3404 
3405   /* loop over local rows, determining number of off diagonal entries */
3406   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3407   jj = 0;
3408   for (i=0; i<m; i++) {
3409     for (j=0; j<ourlens[i]; j++) {
3410       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3411       jj++;
3412     }
3413   }
3414 
3415   for (i=0; i<m; i++) {
3416     ourlens[i] -= offlens[i];
3417   }
3418   if (!sizesset) {
3419     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3420   }
3421   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3422 
3423   for (i=0; i<m; i++) {
3424     ourlens[i] += offlens[i];
3425   }
3426 
3427   if (!rank) {
3428     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3429 
3430     /* read in my part of the matrix numerical values  */
3431     nz   = procsnz[0];
3432     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3433 
3434     /* insert into matrix */
3435     jj      = rstart;
3436     smycols = mycols;
3437     svals   = vals;
3438     for (i=0; i<m; i++) {
3439       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3440       smycols += ourlens[i];
3441       svals   += ourlens[i];
3442       jj++;
3443     }
3444 
3445     /* read in other processors and ship out */
3446     for (i=1; i<size; i++) {
3447       nz     = procsnz[i];
3448       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3449       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3450     }
3451     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3452   } else {
3453     /* receive numeric values */
3454     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3455 
3456     /* receive message of values*/
3457     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3458 
3459     /* insert into matrix */
3460     jj      = rstart;
3461     smycols = mycols;
3462     svals   = vals;
3463     for (i=0; i<m; i++) {
3464       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3465       smycols += ourlens[i];
3466       svals   += ourlens[i];
3467       jj++;
3468     }
3469   }
3470   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3471   ierr = PetscFree(vals);CHKERRQ(ierr);
3472   ierr = PetscFree(mycols);CHKERRQ(ierr);
3473   ierr = PetscFree(rowners);CHKERRQ(ierr);
3474 
3475   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3476   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3477   PetscFunctionReturn(0);
3478 }
3479 
3480 #undef __FUNCT__
3481 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3482 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3483 {
3484   PetscErrorCode ierr;
3485   IS             iscol_local;
3486   PetscInt       csize;
3487 
3488   PetscFunctionBegin;
3489   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3490   if (call == MAT_REUSE_MATRIX) {
3491     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3492     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3493   } else {
3494     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3495   }
3496   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3497   if (call == MAT_INITIAL_MATRIX) {
3498     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3499     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3500   }
3501   PetscFunctionReturn(0);
3502 }
3503 
3504 #undef __FUNCT__
3505 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3506 /*
3507     Not great since it makes two copies of the submatrix, first an SeqAIJ
3508   in local and then by concatenating the local matrices the end result.
3509   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3510 
3511   Note: This requires a sequential iscol with all indices.
3512 */
3513 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3514 {
3515   PetscErrorCode ierr;
3516   PetscMPIInt    rank,size;
3517   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3518   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3519   Mat            *local,M,Mreuse;
3520   MatScalar      *vwork,*aa;
3521   MPI_Comm       comm = ((PetscObject)mat)->comm;
3522   Mat_SeqAIJ     *aij;
3523 
3524 
3525   PetscFunctionBegin;
3526   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3527   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3528 
3529   if (call ==  MAT_REUSE_MATRIX) {
3530     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3531     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3532     local = &Mreuse;
3533     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3534   } else {
3535     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3536     Mreuse = *local;
3537     ierr   = PetscFree(local);CHKERRQ(ierr);
3538   }
3539 
3540   /*
3541       m - number of local rows
3542       n - number of columns (same on all processors)
3543       rstart - first row in new global matrix generated
3544   */
3545   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3546   if (call == MAT_INITIAL_MATRIX) {
3547     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3548     ii  = aij->i;
3549     jj  = aij->j;
3550 
3551     /*
3552         Determine the number of non-zeros in the diagonal and off-diagonal
3553         portions of the matrix in order to do correct preallocation
3554     */
3555 
3556     /* first get start and end of "diagonal" columns */
3557     if (csize == PETSC_DECIDE) {
3558       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3559       if (mglobal == n) { /* square matrix */
3560 	nlocal = m;
3561       } else {
3562         nlocal = n/size + ((n % size) > rank);
3563       }
3564     } else {
3565       nlocal = csize;
3566     }
3567     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3568     rstart = rend - nlocal;
3569     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3570 
3571     /* next, compute all the lengths */
3572     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3573     olens = dlens + m;
3574     for (i=0; i<m; i++) {
3575       jend = ii[i+1] - ii[i];
3576       olen = 0;
3577       dlen = 0;
3578       for (j=0; j<jend; j++) {
3579         if (*jj < rstart || *jj >= rend) olen++;
3580         else dlen++;
3581         jj++;
3582       }
3583       olens[i] = olen;
3584       dlens[i] = dlen;
3585     }
3586     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3587     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3588     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3589     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3590     ierr = PetscFree(dlens);CHKERRQ(ierr);
3591   } else {
3592     PetscInt ml,nl;
3593 
3594     M = *newmat;
3595     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3596     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3597     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3598     /*
3599          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3600        rather than the slower MatSetValues().
3601     */
3602     M->was_assembled = PETSC_TRUE;
3603     M->assembled     = PETSC_FALSE;
3604   }
3605   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3606   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3607   ii  = aij->i;
3608   jj  = aij->j;
3609   aa  = aij->a;
3610   for (i=0; i<m; i++) {
3611     row   = rstart + i;
3612     nz    = ii[i+1] - ii[i];
3613     cwork = jj;     jj += nz;
3614     vwork = aa;     aa += nz;
3615     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3616   }
3617 
3618   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3619   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3620   *newmat = M;
3621 
3622   /* save submatrix used in processor for next request */
3623   if (call ==  MAT_INITIAL_MATRIX) {
3624     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3625     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3626   }
3627 
3628   PetscFunctionReturn(0);
3629 }
3630 
3631 EXTERN_C_BEGIN
3632 #undef __FUNCT__
3633 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3634 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3635 {
3636   PetscInt       m,cstart, cend,j,nnz,i,d;
3637   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3638   const PetscInt *JJ;
3639   PetscScalar    *values;
3640   PetscErrorCode ierr;
3641 
3642   PetscFunctionBegin;
3643   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3644 
3645   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3646   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3647   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3648   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3649   m      = B->rmap->n;
3650   cstart = B->cmap->rstart;
3651   cend   = B->cmap->rend;
3652   rstart = B->rmap->rstart;
3653 
3654   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3655 
3656 #if defined(PETSC_USE_DEBUGGING)
3657   for (i=0; i<m; i++) {
3658     nnz     = Ii[i+1]- Ii[i];
3659     JJ      = J + Ii[i];
3660     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3661     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3662     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3663   }
3664 #endif
3665 
3666   for (i=0; i<m; i++) {
3667     nnz     = Ii[i+1]- Ii[i];
3668     JJ      = J + Ii[i];
3669     nnz_max = PetscMax(nnz_max,nnz);
3670     d       = 0;
3671     for (j=0; j<nnz; j++) {
3672       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3673     }
3674     d_nnz[i] = d;
3675     o_nnz[i] = nnz - d;
3676   }
3677   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3678   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3679 
3680   if (v) values = (PetscScalar*)v;
3681   else {
3682     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3683     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3684   }
3685 
3686   for (i=0; i<m; i++) {
3687     ii   = i + rstart;
3688     nnz  = Ii[i+1]- Ii[i];
3689     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3690   }
3691   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3692   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3693 
3694   if (!v) {
3695     ierr = PetscFree(values);CHKERRQ(ierr);
3696   }
3697   PetscFunctionReturn(0);
3698 }
3699 EXTERN_C_END
3700 
3701 #undef __FUNCT__
3702 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3703 /*@
3704    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3705    (the default parallel PETSc format).
3706 
3707    Collective on MPI_Comm
3708 
3709    Input Parameters:
3710 +  B - the matrix
3711 .  i - the indices into j for the start of each local row (starts with zero)
3712 .  j - the column indices for each local row (starts with zero)
3713 -  v - optional values in the matrix
3714 
3715    Level: developer
3716 
3717    Notes:
3718        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3719      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3720      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3721 
3722        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3723 
3724        The format which is used for the sparse matrix input, is equivalent to a
3725     row-major ordering.. i.e for the following matrix, the input data expected is
3726     as shown:
3727 
3728         1 0 0
3729         2 0 3     P0
3730        -------
3731         4 5 6     P1
3732 
3733      Process0 [P0]: rows_owned=[0,1]
3734         i =  {0,1,3}  [size = nrow+1  = 2+1]
3735         j =  {0,0,2}  [size = nz = 6]
3736         v =  {1,2,3}  [size = nz = 6]
3737 
3738      Process1 [P1]: rows_owned=[2]
3739         i =  {0,3}    [size = nrow+1  = 1+1]
3740         j =  {0,1,2}  [size = nz = 6]
3741         v =  {4,5,6}  [size = nz = 6]
3742 
3743 .keywords: matrix, aij, compressed row, sparse, parallel
3744 
3745 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3746           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3747 @*/
3748 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3749 {
3750   PetscErrorCode ierr;
3751 
3752   PetscFunctionBegin;
3753   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3754   PetscFunctionReturn(0);
3755 }
3756 
3757 #undef __FUNCT__
3758 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3759 /*@C
3760    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3761    (the default parallel PETSc format).  For good matrix assembly performance
3762    the user should preallocate the matrix storage by setting the parameters
3763    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3764    performance can be increased by more than a factor of 50.
3765 
3766    Collective on MPI_Comm
3767 
3768    Input Parameters:
3769 +  A - the matrix
3770 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3771            (same value is used for all local rows)
3772 .  d_nnz - array containing the number of nonzeros in the various rows of the
3773            DIAGONAL portion of the local submatrix (possibly different for each row)
3774            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3775            The size of this array is equal to the number of local rows, i.e 'm'.
3776            You must leave room for the diagonal entry even if it is zero.
3777 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3778            submatrix (same value is used for all local rows).
3779 -  o_nnz - array containing the number of nonzeros in the various rows of the
3780            OFF-DIAGONAL portion of the local submatrix (possibly different for
3781            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3782            structure. The size of this array is equal to the number
3783            of local rows, i.e 'm'.
3784 
3785    If the *_nnz parameter is given then the *_nz parameter is ignored
3786 
3787    The AIJ format (also called the Yale sparse matrix format or
3788    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3789    storage.  The stored row and column indices begin with zero.
3790    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3791 
3792    The parallel matrix is partitioned such that the first m0 rows belong to
3793    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3794    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3795 
3796    The DIAGONAL portion of the local submatrix of a processor can be defined
3797    as the submatrix which is obtained by extraction the part corresponding to
3798    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3799    first row that belongs to the processor, r2 is the last row belonging to
3800    the this processor, and c1-c2 is range of indices of the local part of a
3801    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3802    common case of a square matrix, the row and column ranges are the same and
3803    the DIAGONAL part is also square. The remaining portion of the local
3804    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3805 
3806    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3807 
3808    You can call MatGetInfo() to get information on how effective the preallocation was;
3809    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3810    You can also run with the option -info and look for messages with the string
3811    malloc in them to see if additional memory allocation was needed.
3812 
3813    Example usage:
3814 
3815    Consider the following 8x8 matrix with 34 non-zero values, that is
3816    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3817    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3818    as follows:
3819 
3820 .vb
3821             1  2  0  |  0  3  0  |  0  4
3822     Proc0   0  5  6  |  7  0  0  |  8  0
3823             9  0 10  | 11  0  0  | 12  0
3824     -------------------------------------
3825            13  0 14  | 15 16 17  |  0  0
3826     Proc1   0 18  0  | 19 20 21  |  0  0
3827             0  0  0  | 22 23  0  | 24  0
3828     -------------------------------------
3829     Proc2  25 26 27  |  0  0 28  | 29  0
3830            30  0  0  | 31 32 33  |  0 34
3831 .ve
3832 
3833    This can be represented as a collection of submatrices as:
3834 
3835 .vb
3836       A B C
3837       D E F
3838       G H I
3839 .ve
3840 
3841    Where the submatrices A,B,C are owned by proc0, D,E,F are
3842    owned by proc1, G,H,I are owned by proc2.
3843 
3844    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3845    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3846    The 'M','N' parameters are 8,8, and have the same values on all procs.
3847 
3848    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3849    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3850    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3851    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3852    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3853    matrix, ans [DF] as another SeqAIJ matrix.
3854 
3855    When d_nz, o_nz parameters are specified, d_nz storage elements are
3856    allocated for every row of the local diagonal submatrix, and o_nz
3857    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3858    One way to choose d_nz and o_nz is to use the max nonzerors per local
3859    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3860    In this case, the values of d_nz,o_nz are:
3861 .vb
3862      proc0 : dnz = 2, o_nz = 2
3863      proc1 : dnz = 3, o_nz = 2
3864      proc2 : dnz = 1, o_nz = 4
3865 .ve
3866    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3867    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3868    for proc3. i.e we are using 12+15+10=37 storage locations to store
3869    34 values.
3870 
3871    When d_nnz, o_nnz parameters are specified, the storage is specified
3872    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3873    In the above case the values for d_nnz,o_nnz are:
3874 .vb
3875      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3876      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3877      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3878 .ve
3879    Here the space allocated is sum of all the above values i.e 34, and
3880    hence pre-allocation is perfect.
3881 
3882    Level: intermediate
3883 
3884 .keywords: matrix, aij, compressed row, sparse, parallel
3885 
3886 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3887           MPIAIJ, MatGetInfo()
3888 @*/
3889 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3890 {
3891   PetscErrorCode ierr;
3892 
3893   PetscFunctionBegin;
3894   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3895   PetscFunctionReturn(0);
3896 }
3897 
3898 #undef __FUNCT__
3899 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3900 /*@
3901      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3902          CSR format the local rows.
3903 
3904    Collective on MPI_Comm
3905 
3906    Input Parameters:
3907 +  comm - MPI communicator
3908 .  m - number of local rows (Cannot be PETSC_DECIDE)
3909 .  n - This value should be the same as the local size used in creating the
3910        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3911        calculated if N is given) For square matrices n is almost always m.
3912 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3913 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3914 .   i - row indices
3915 .   j - column indices
3916 -   a - matrix values
3917 
3918    Output Parameter:
3919 .   mat - the matrix
3920 
3921    Level: intermediate
3922 
3923    Notes:
3924        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3925      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3926      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3927 
3928        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3929 
3930        The format which is used for the sparse matrix input, is equivalent to a
3931     row-major ordering.. i.e for the following matrix, the input data expected is
3932     as shown:
3933 
3934         1 0 0
3935         2 0 3     P0
3936        -------
3937         4 5 6     P1
3938 
3939      Process0 [P0]: rows_owned=[0,1]
3940         i =  {0,1,3}  [size = nrow+1  = 2+1]
3941         j =  {0,0,2}  [size = nz = 6]
3942         v =  {1,2,3}  [size = nz = 6]
3943 
3944      Process1 [P1]: rows_owned=[2]
3945         i =  {0,3}    [size = nrow+1  = 1+1]
3946         j =  {0,1,2}  [size = nz = 6]
3947         v =  {4,5,6}  [size = nz = 6]
3948 
3949 .keywords: matrix, aij, compressed row, sparse, parallel
3950 
3951 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3952           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3953 @*/
3954 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3955 {
3956   PetscErrorCode ierr;
3957 
3958  PetscFunctionBegin;
3959   if (i[0]) {
3960     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3961   }
3962   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3963   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3964   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3965   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3966   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3967   PetscFunctionReturn(0);
3968 }
3969 
3970 #undef __FUNCT__
3971 #define __FUNCT__ "MatCreateMPIAIJ"
3972 /*@C
3973    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3974    (the default parallel PETSc format).  For good matrix assembly performance
3975    the user should preallocate the matrix storage by setting the parameters
3976    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3977    performance can be increased by more than a factor of 50.
3978 
3979    Collective on MPI_Comm
3980 
3981    Input Parameters:
3982 +  comm - MPI communicator
3983 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3984            This value should be the same as the local size used in creating the
3985            y vector for the matrix-vector product y = Ax.
3986 .  n - This value should be the same as the local size used in creating the
3987        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3988        calculated if N is given) For square matrices n is almost always m.
3989 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3990 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3991 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3992            (same value is used for all local rows)
3993 .  d_nnz - array containing the number of nonzeros in the various rows of the
3994            DIAGONAL portion of the local submatrix (possibly different for each row)
3995            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3996            The size of this array is equal to the number of local rows, i.e 'm'.
3997            You must leave room for the diagonal entry even if it is zero.
3998 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3999            submatrix (same value is used for all local rows).
4000 -  o_nnz - array containing the number of nonzeros in the various rows of the
4001            OFF-DIAGONAL portion of the local submatrix (possibly different for
4002            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4003            structure. The size of this array is equal to the number
4004            of local rows, i.e 'm'.
4005 
4006    Output Parameter:
4007 .  A - the matrix
4008 
4009    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4010    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4011    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4012 
4013    Notes:
4014    If the *_nnz parameter is given then the *_nz parameter is ignored
4015 
4016    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4017    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4018    storage requirements for this matrix.
4019 
4020    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4021    processor than it must be used on all processors that share the object for
4022    that argument.
4023 
4024    The user MUST specify either the local or global matrix dimensions
4025    (possibly both).
4026 
4027    The parallel matrix is partitioned across processors such that the
4028    first m0 rows belong to process 0, the next m1 rows belong to
4029    process 1, the next m2 rows belong to process 2 etc.. where
4030    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4031    values corresponding to [m x N] submatrix.
4032 
4033    The columns are logically partitioned with the n0 columns belonging
4034    to 0th partition, the next n1 columns belonging to the next
4035    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4036 
4037    The DIAGONAL portion of the local submatrix on any given processor
4038    is the submatrix corresponding to the rows and columns m,n
4039    corresponding to the given processor. i.e diagonal matrix on
4040    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4041    etc. The remaining portion of the local submatrix [m x (N-n)]
4042    constitute the OFF-DIAGONAL portion. The example below better
4043    illustrates this concept.
4044 
4045    For a square global matrix we define each processor's diagonal portion
4046    to be its local rows and the corresponding columns (a square submatrix);
4047    each processor's off-diagonal portion encompasses the remainder of the
4048    local matrix (a rectangular submatrix).
4049 
4050    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4051 
4052    When calling this routine with a single process communicator, a matrix of
4053    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4054    type of communicator, use the construction mechanism:
4055      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4056 
4057    By default, this format uses inodes (identical nodes) when possible.
4058    We search for consecutive rows with the same nonzero structure, thereby
4059    reusing matrix information to achieve increased efficiency.
4060 
4061    Options Database Keys:
4062 +  -mat_no_inode  - Do not use inodes
4063 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4064 -  -mat_aij_oneindex - Internally use indexing starting at 1
4065         rather than 0.  Note that when calling MatSetValues(),
4066         the user still MUST index entries starting at 0!
4067 
4068 
4069    Example usage:
4070 
4071    Consider the following 8x8 matrix with 34 non-zero values, that is
4072    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4073    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4074    as follows:
4075 
4076 .vb
4077             1  2  0  |  0  3  0  |  0  4
4078     Proc0   0  5  6  |  7  0  0  |  8  0
4079             9  0 10  | 11  0  0  | 12  0
4080     -------------------------------------
4081            13  0 14  | 15 16 17  |  0  0
4082     Proc1   0 18  0  | 19 20 21  |  0  0
4083             0  0  0  | 22 23  0  | 24  0
4084     -------------------------------------
4085     Proc2  25 26 27  |  0  0 28  | 29  0
4086            30  0  0  | 31 32 33  |  0 34
4087 .ve
4088 
4089    This can be represented as a collection of submatrices as:
4090 
4091 .vb
4092       A B C
4093       D E F
4094       G H I
4095 .ve
4096 
4097    Where the submatrices A,B,C are owned by proc0, D,E,F are
4098    owned by proc1, G,H,I are owned by proc2.
4099 
4100    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4101    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4102    The 'M','N' parameters are 8,8, and have the same values on all procs.
4103 
4104    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4105    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4106    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4107    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4108    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4109    matrix, ans [DF] as another SeqAIJ matrix.
4110 
4111    When d_nz, o_nz parameters are specified, d_nz storage elements are
4112    allocated for every row of the local diagonal submatrix, and o_nz
4113    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4114    One way to choose d_nz and o_nz is to use the max nonzerors per local
4115    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4116    In this case, the values of d_nz,o_nz are:
4117 .vb
4118      proc0 : dnz = 2, o_nz = 2
4119      proc1 : dnz = 3, o_nz = 2
4120      proc2 : dnz = 1, o_nz = 4
4121 .ve
4122    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4123    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4124    for proc3. i.e we are using 12+15+10=37 storage locations to store
4125    34 values.
4126 
4127    When d_nnz, o_nnz parameters are specified, the storage is specified
4128    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4129    In the above case the values for d_nnz,o_nnz are:
4130 .vb
4131      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4132      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4133      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4134 .ve
4135    Here the space allocated is sum of all the above values i.e 34, and
4136    hence pre-allocation is perfect.
4137 
4138    Level: intermediate
4139 
4140 .keywords: matrix, aij, compressed row, sparse, parallel
4141 
4142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4143           MPIAIJ, MatCreateMPIAIJWithArrays()
4144 @*/
4145 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4146 {
4147   PetscErrorCode ierr;
4148   PetscMPIInt    size;
4149 
4150   PetscFunctionBegin;
4151   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4152   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4153   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4154   if (size > 1) {
4155     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4156     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4157   } else {
4158     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4159     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4160   }
4161   PetscFunctionReturn(0);
4162 }
4163 
4164 #undef __FUNCT__
4165 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4166 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4167 {
4168   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4169 
4170   PetscFunctionBegin;
4171   *Ad     = a->A;
4172   *Ao     = a->B;
4173   *colmap = a->garray;
4174   PetscFunctionReturn(0);
4175 }
4176 
4177 #undef __FUNCT__
4178 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4179 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4180 {
4181   PetscErrorCode ierr;
4182   PetscInt       i;
4183   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4184 
4185   PetscFunctionBegin;
4186   if (coloring->ctype == IS_COLORING_GLOBAL) {
4187     ISColoringValue *allcolors,*colors;
4188     ISColoring      ocoloring;
4189 
4190     /* set coloring for diagonal portion */
4191     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4192 
4193     /* set coloring for off-diagonal portion */
4194     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4195     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4196     for (i=0; i<a->B->cmap->n; i++) {
4197       colors[i] = allcolors[a->garray[i]];
4198     }
4199     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4200     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4201     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4202     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4203   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4204     ISColoringValue *colors;
4205     PetscInt        *larray;
4206     ISColoring      ocoloring;
4207 
4208     /* set coloring for diagonal portion */
4209     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4210     for (i=0; i<a->A->cmap->n; i++) {
4211       larray[i] = i + A->cmap->rstart;
4212     }
4213     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4214     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4215     for (i=0; i<a->A->cmap->n; i++) {
4216       colors[i] = coloring->colors[larray[i]];
4217     }
4218     ierr = PetscFree(larray);CHKERRQ(ierr);
4219     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4220     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4221     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4222 
4223     /* set coloring for off-diagonal portion */
4224     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4225     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4226     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4227     for (i=0; i<a->B->cmap->n; i++) {
4228       colors[i] = coloring->colors[larray[i]];
4229     }
4230     ierr = PetscFree(larray);CHKERRQ(ierr);
4231     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4232     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4233     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4234   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4235 
4236   PetscFunctionReturn(0);
4237 }
4238 
4239 #if defined(PETSC_HAVE_ADIC)
4240 #undef __FUNCT__
4241 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4242 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4243 {
4244   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4245   PetscErrorCode ierr;
4246 
4247   PetscFunctionBegin;
4248   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4249   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4250   PetscFunctionReturn(0);
4251 }
4252 #endif
4253 
4254 #undef __FUNCT__
4255 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4256 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4257 {
4258   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4259   PetscErrorCode ierr;
4260 
4261   PetscFunctionBegin;
4262   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4263   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4264   PetscFunctionReturn(0);
4265 }
4266 
4267 #undef __FUNCT__
4268 #define __FUNCT__ "MatMerge"
4269 /*@
4270       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4271                  matrices from each processor
4272 
4273     Collective on MPI_Comm
4274 
4275    Input Parameters:
4276 +    comm - the communicators the parallel matrix will live on
4277 .    inmat - the input sequential matrices
4278 .    n - number of local columns (or PETSC_DECIDE)
4279 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4280 
4281    Output Parameter:
4282 .    outmat - the parallel matrix generated
4283 
4284     Level: advanced
4285 
4286    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4287 
4288 @*/
4289 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4290 {
4291   PetscErrorCode ierr;
4292   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4293   PetscInt       *indx;
4294   PetscScalar    *values;
4295 
4296   PetscFunctionBegin;
4297   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4298   if (scall == MAT_INITIAL_MATRIX){
4299     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4300     if (n == PETSC_DECIDE){
4301       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4302     }
4303     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4304     rstart -= m;
4305 
4306     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4307     for (i=0;i<m;i++) {
4308       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4309       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4310       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4311     }
4312     /* This routine will ONLY return MPIAIJ type matrix */
4313     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4314     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4315     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4316     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4317     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4318 
4319   } else if (scall == MAT_REUSE_MATRIX){
4320     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4321   } else {
4322     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4323   }
4324 
4325   for (i=0;i<m;i++) {
4326     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4327     Ii    = i + rstart;
4328     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4329     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4330   }
4331   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4332   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4333   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4334 
4335   PetscFunctionReturn(0);
4336 }
4337 
4338 #undef __FUNCT__
4339 #define __FUNCT__ "MatFileSplit"
4340 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4341 {
4342   PetscErrorCode    ierr;
4343   PetscMPIInt       rank;
4344   PetscInt          m,N,i,rstart,nnz;
4345   size_t            len;
4346   const PetscInt    *indx;
4347   PetscViewer       out;
4348   char              *name;
4349   Mat               B;
4350   const PetscScalar *values;
4351 
4352   PetscFunctionBegin;
4353   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4354   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4355   /* Should this be the type of the diagonal block of A? */
4356   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4357   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4358   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4359   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4360   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4361   for (i=0;i<m;i++) {
4362     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4363     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4364     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4365   }
4366   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4367   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4368 
4369   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4370   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4371   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4372   sprintf(name,"%s.%d",outfile,rank);
4373   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4374   ierr = PetscFree(name);
4375   ierr = MatView(B,out);CHKERRQ(ierr);
4376   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4377   ierr = MatDestroy(&B);CHKERRQ(ierr);
4378   PetscFunctionReturn(0);
4379 }
4380 
4381 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4382 #undef __FUNCT__
4383 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4384 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4385 {
4386   PetscErrorCode       ierr;
4387   Mat_Merge_SeqsToMPI  *merge;
4388   PetscContainer       container;
4389 
4390   PetscFunctionBegin;
4391   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4392   if (container) {
4393     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4394     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4395     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4396     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4397     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4398     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4399     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4400     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4401     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4402     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4403     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4404     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4405     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4406     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4407     ierr = PetscFree(merge);CHKERRQ(ierr);
4408     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4409   }
4410   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4411   PetscFunctionReturn(0);
4412 }
4413 
4414 #include <../src/mat/utils/freespace.h>
4415 #include <petscbt.h>
4416 
4417 #undef __FUNCT__
4418 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4419 /*@C
4420       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4421                  matrices from each processor
4422 
4423     Collective on MPI_Comm
4424 
4425    Input Parameters:
4426 +    comm - the communicators the parallel matrix will live on
4427 .    seqmat - the input sequential matrices
4428 .    m - number of local rows (or PETSC_DECIDE)
4429 .    n - number of local columns (or PETSC_DECIDE)
4430 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4431 
4432    Output Parameter:
4433 .    mpimat - the parallel matrix generated
4434 
4435     Level: advanced
4436 
4437    Notes:
4438      The dimensions of the sequential matrix in each processor MUST be the same.
4439      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4440      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4441 @*/
4442 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4443 {
4444   PetscErrorCode       ierr;
4445   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4446   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4447   PetscMPIInt          size,rank,taga,*len_s;
4448   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4449   PetscInt             proc,m;
4450   PetscInt             **buf_ri,**buf_rj;
4451   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4452   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4453   MPI_Request          *s_waits,*r_waits;
4454   MPI_Status           *status;
4455   MatScalar            *aa=a->a;
4456   MatScalar            **abuf_r,*ba_i;
4457   Mat_Merge_SeqsToMPI  *merge;
4458   PetscContainer       container;
4459 
4460   PetscFunctionBegin;
4461   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4462 
4463   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4464   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4465 
4466   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4467   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4468 
4469   bi     = merge->bi;
4470   bj     = merge->bj;
4471   buf_ri = merge->buf_ri;
4472   buf_rj = merge->buf_rj;
4473 
4474   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4475   owners = merge->rowmap->range;
4476   len_s  = merge->len_s;
4477 
4478   /* send and recv matrix values */
4479   /*-----------------------------*/
4480   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4481   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4482 
4483   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4484   for (proc=0,k=0; proc<size; proc++){
4485     if (!len_s[proc]) continue;
4486     i = owners[proc];
4487     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4488     k++;
4489   }
4490 
4491   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4492   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4493   ierr = PetscFree(status);CHKERRQ(ierr);
4494 
4495   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4496   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4497 
4498   /* insert mat values of mpimat */
4499   /*----------------------------*/
4500   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4501   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4502 
4503   for (k=0; k<merge->nrecv; k++){
4504     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4505     nrows = *(buf_ri_k[k]);
4506     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4507     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4508   }
4509 
4510   /* set values of ba */
4511   m = merge->rowmap->n;
4512   for (i=0; i<m; i++) {
4513     arow = owners[rank] + i;
4514     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4515     bnzi = bi[i+1] - bi[i];
4516     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4517 
4518     /* add local non-zero vals of this proc's seqmat into ba */
4519     anzi = ai[arow+1] - ai[arow];
4520     aj   = a->j + ai[arow];
4521     aa   = a->a + ai[arow];
4522     nextaj = 0;
4523     for (j=0; nextaj<anzi; j++){
4524       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4525         ba_i[j] += aa[nextaj++];
4526       }
4527     }
4528 
4529     /* add received vals into ba */
4530     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4531       /* i-th row */
4532       if (i == *nextrow[k]) {
4533         anzi = *(nextai[k]+1) - *nextai[k];
4534         aj   = buf_rj[k] + *(nextai[k]);
4535         aa   = abuf_r[k] + *(nextai[k]);
4536         nextaj = 0;
4537         for (j=0; nextaj<anzi; j++){
4538           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4539             ba_i[j] += aa[nextaj++];
4540           }
4541         }
4542         nextrow[k]++; nextai[k]++;
4543       }
4544     }
4545     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4546   }
4547   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4548   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4549 
4550   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4551   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4552   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4553   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4554   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4555   PetscFunctionReturn(0);
4556 }
4557 
4558 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4559 
4560 #undef __FUNCT__
4561 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4562 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4563 {
4564   PetscErrorCode       ierr;
4565   Mat                  B_mpi;
4566   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4567   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4568   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4569   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4570   PetscInt             len,proc,*dnz,*onz;
4571   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4572   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4573   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4574   MPI_Status           *status;
4575   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4576   PetscBT              lnkbt;
4577   Mat_Merge_SeqsToMPI  *merge;
4578   PetscContainer       container;
4579 
4580   PetscFunctionBegin;
4581   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4582 
4583   /* make sure it is a PETSc comm */
4584   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4585   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4586   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4587 
4588   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4589   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4590 
4591   /* determine row ownership */
4592   /*---------------------------------------------------------*/
4593   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4594   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4595   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4596   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4597   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4598   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4599   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4600 
4601   m      = merge->rowmap->n;
4602   M      = merge->rowmap->N;
4603   owners = merge->rowmap->range;
4604 
4605   /* determine the number of messages to send, their lengths */
4606   /*---------------------------------------------------------*/
4607   len_s  = merge->len_s;
4608 
4609   len = 0;  /* length of buf_si[] */
4610   merge->nsend = 0;
4611   for (proc=0; proc<size; proc++){
4612     len_si[proc] = 0;
4613     if (proc == rank){
4614       len_s[proc] = 0;
4615     } else {
4616       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4617       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4618     }
4619     if (len_s[proc]) {
4620       merge->nsend++;
4621       nrows = 0;
4622       for (i=owners[proc]; i<owners[proc+1]; i++){
4623         if (ai[i+1] > ai[i]) nrows++;
4624       }
4625       len_si[proc] = 2*(nrows+1);
4626       len += len_si[proc];
4627     }
4628   }
4629 
4630   /* determine the number and length of messages to receive for ij-structure */
4631   /*-------------------------------------------------------------------------*/
4632   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4633   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4634 
4635   /* post the Irecv of j-structure */
4636   /*-------------------------------*/
4637   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4638   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4639 
4640   /* post the Isend of j-structure */
4641   /*--------------------------------*/
4642   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4643 
4644   for (proc=0, k=0; proc<size; proc++){
4645     if (!len_s[proc]) continue;
4646     i = owners[proc];
4647     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4648     k++;
4649   }
4650 
4651   /* receives and sends of j-structure are complete */
4652   /*------------------------------------------------*/
4653   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4654   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4655 
4656   /* send and recv i-structure */
4657   /*---------------------------*/
4658   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4659   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4660 
4661   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4662   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4663   for (proc=0,k=0; proc<size; proc++){
4664     if (!len_s[proc]) continue;
4665     /* form outgoing message for i-structure:
4666          buf_si[0]:                 nrows to be sent
4667                [1:nrows]:           row index (global)
4668                [nrows+1:2*nrows+1]: i-structure index
4669     */
4670     /*-------------------------------------------*/
4671     nrows = len_si[proc]/2 - 1;
4672     buf_si_i    = buf_si + nrows+1;
4673     buf_si[0]   = nrows;
4674     buf_si_i[0] = 0;
4675     nrows = 0;
4676     for (i=owners[proc]; i<owners[proc+1]; i++){
4677       anzi = ai[i+1] - ai[i];
4678       if (anzi) {
4679         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4680         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4681         nrows++;
4682       }
4683     }
4684     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4685     k++;
4686     buf_si += len_si[proc];
4687   }
4688 
4689   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4690   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4691 
4692   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4693   for (i=0; i<merge->nrecv; i++){
4694     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4695   }
4696 
4697   ierr = PetscFree(len_si);CHKERRQ(ierr);
4698   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4699   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4700   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4701   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4702   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4703   ierr = PetscFree(status);CHKERRQ(ierr);
4704 
4705   /* compute a local seq matrix in each processor */
4706   /*----------------------------------------------*/
4707   /* allocate bi array and free space for accumulating nonzero column info */
4708   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4709   bi[0] = 0;
4710 
4711   /* create and initialize a linked list */
4712   nlnk = N+1;
4713   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4714 
4715   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4716   len = 0;
4717   len  = ai[owners[rank+1]] - ai[owners[rank]];
4718   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4719   current_space = free_space;
4720 
4721   /* determine symbolic info for each local row */
4722   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4723 
4724   for (k=0; k<merge->nrecv; k++){
4725     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4726     nrows = *buf_ri_k[k];
4727     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4728     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4729   }
4730 
4731   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4732   len = 0;
4733   for (i=0;i<m;i++) {
4734     bnzi   = 0;
4735     /* add local non-zero cols of this proc's seqmat into lnk */
4736     arow   = owners[rank] + i;
4737     anzi   = ai[arow+1] - ai[arow];
4738     aj     = a->j + ai[arow];
4739     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4740     bnzi += nlnk;
4741     /* add received col data into lnk */
4742     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4743       if (i == *nextrow[k]) { /* i-th row */
4744         anzi = *(nextai[k]+1) - *nextai[k];
4745         aj   = buf_rj[k] + *nextai[k];
4746         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4747         bnzi += nlnk;
4748         nextrow[k]++; nextai[k]++;
4749       }
4750     }
4751     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4752 
4753     /* if free space is not available, make more free space */
4754     if (current_space->local_remaining<bnzi) {
4755       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4756       nspacedouble++;
4757     }
4758     /* copy data into free space, then initialize lnk */
4759     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4760     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4761 
4762     current_space->array           += bnzi;
4763     current_space->local_used      += bnzi;
4764     current_space->local_remaining -= bnzi;
4765 
4766     bi[i+1] = bi[i] + bnzi;
4767   }
4768 
4769   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4770 
4771   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4772   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4773   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4774 
4775   /* create symbolic parallel matrix B_mpi */
4776   /*---------------------------------------*/
4777   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4778   if (n==PETSC_DECIDE) {
4779     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4780   } else {
4781     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4782   }
4783   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4784   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4785   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4786 
4787   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4788   B_mpi->assembled     = PETSC_FALSE;
4789   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4790   merge->bi            = bi;
4791   merge->bj            = bj;
4792   merge->buf_ri        = buf_ri;
4793   merge->buf_rj        = buf_rj;
4794   merge->coi           = PETSC_NULL;
4795   merge->coj           = PETSC_NULL;
4796   merge->owners_co     = PETSC_NULL;
4797 
4798   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4799 
4800   /* attach the supporting struct to B_mpi for reuse */
4801   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4802   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4803   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4804   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4805   *mpimat = B_mpi;
4806 
4807   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4808   PetscFunctionReturn(0);
4809 }
4810 
4811 #undef __FUNCT__
4812 #define __FUNCT__ "MatMerge_SeqsToMPI"
4813 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4814 {
4815   PetscErrorCode   ierr;
4816 
4817   PetscFunctionBegin;
4818   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4819   if (scall == MAT_INITIAL_MATRIX){
4820     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4821   }
4822   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4823   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4824   PetscFunctionReturn(0);
4825 }
4826 
4827 #undef __FUNCT__
4828 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4829 /*@
4830      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4831           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4832           with MatGetSize()
4833 
4834     Not Collective
4835 
4836    Input Parameters:
4837 +    A - the matrix
4838 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4839 
4840    Output Parameter:
4841 .    A_loc - the local sequential matrix generated
4842 
4843     Level: developer
4844 
4845 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4846 
4847 @*/
4848 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4849 {
4850   PetscErrorCode  ierr;
4851   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4852   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4853   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4854   MatScalar       *aa=a->a,*ba=b->a,*cam;
4855   PetscScalar     *ca;
4856   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4857   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4858   PetscBool       match;
4859 
4860   PetscFunctionBegin;
4861   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4862   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4863   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4864   if (scall == MAT_INITIAL_MATRIX){
4865     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4866     ci[0] = 0;
4867     for (i=0; i<am; i++){
4868       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4869     }
4870     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4871     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4872     k = 0;
4873     for (i=0; i<am; i++) {
4874       ncols_o = bi[i+1] - bi[i];
4875       ncols_d = ai[i+1] - ai[i];
4876       /* off-diagonal portion of A */
4877       for (jo=0; jo<ncols_o; jo++) {
4878         col = cmap[*bj];
4879         if (col >= cstart) break;
4880         cj[k]   = col; bj++;
4881         ca[k++] = *ba++;
4882       }
4883       /* diagonal portion of A */
4884       for (j=0; j<ncols_d; j++) {
4885         cj[k]   = cstart + *aj++;
4886         ca[k++] = *aa++;
4887       }
4888       /* off-diagonal portion of A */
4889       for (j=jo; j<ncols_o; j++) {
4890         cj[k]   = cmap[*bj++];
4891         ca[k++] = *ba++;
4892       }
4893     }
4894     /* put together the new matrix */
4895     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4896     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4897     /* Since these are PETSc arrays, change flags to free them as necessary. */
4898     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4899     mat->free_a  = PETSC_TRUE;
4900     mat->free_ij = PETSC_TRUE;
4901     mat->nonew   = 0;
4902   } else if (scall == MAT_REUSE_MATRIX){
4903     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4904     ci = mat->i; cj = mat->j; cam = mat->a;
4905     for (i=0; i<am; i++) {
4906       /* off-diagonal portion of A */
4907       ncols_o = bi[i+1] - bi[i];
4908       for (jo=0; jo<ncols_o; jo++) {
4909         col = cmap[*bj];
4910         if (col >= cstart) break;
4911         *cam++ = *ba++; bj++;
4912       }
4913       /* diagonal portion of A */
4914       ncols_d = ai[i+1] - ai[i];
4915       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4916       /* off-diagonal portion of A */
4917       for (j=jo; j<ncols_o; j++) {
4918         *cam++ = *ba++; bj++;
4919       }
4920     }
4921   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4922   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4923   PetscFunctionReturn(0);
4924 }
4925 
4926 #undef __FUNCT__
4927 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4928 /*@C
4929      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4930 
4931     Not Collective
4932 
4933    Input Parameters:
4934 +    A - the matrix
4935 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4936 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4937 
4938    Output Parameter:
4939 .    A_loc - the local sequential matrix generated
4940 
4941     Level: developer
4942 
4943 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4944 
4945 @*/
4946 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4947 {
4948   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4949   PetscErrorCode    ierr;
4950   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4951   IS                isrowa,iscola;
4952   Mat               *aloc;
4953   PetscBool       match;
4954 
4955   PetscFunctionBegin;
4956   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4957   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4958   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4959   if (!row){
4960     start = A->rmap->rstart; end = A->rmap->rend;
4961     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4962   } else {
4963     isrowa = *row;
4964   }
4965   if (!col){
4966     start = A->cmap->rstart;
4967     cmap  = a->garray;
4968     nzA   = a->A->cmap->n;
4969     nzB   = a->B->cmap->n;
4970     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4971     ncols = 0;
4972     for (i=0; i<nzB; i++) {
4973       if (cmap[i] < start) idx[ncols++] = cmap[i];
4974       else break;
4975     }
4976     imark = i;
4977     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4978     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4979     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4980   } else {
4981     iscola = *col;
4982   }
4983   if (scall != MAT_INITIAL_MATRIX){
4984     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4985     aloc[0] = *A_loc;
4986   }
4987   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4988   *A_loc = aloc[0];
4989   ierr = PetscFree(aloc);CHKERRQ(ierr);
4990   if (!row){
4991     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4992   }
4993   if (!col){
4994     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4995   }
4996   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4997   PetscFunctionReturn(0);
4998 }
4999 
5000 #undef __FUNCT__
5001 #define __FUNCT__ "MatGetBrowsOfAcols"
5002 /*@C
5003     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5004 
5005     Collective on Mat
5006 
5007    Input Parameters:
5008 +    A,B - the matrices in mpiaij format
5009 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5010 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5011 
5012    Output Parameter:
5013 +    rowb, colb - index sets of rows and columns of B to extract
5014 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
5015 -    B_seq - the sequential matrix generated
5016 
5017     Level: developer
5018 
5019 @*/
5020 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
5021 {
5022   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5023   PetscErrorCode    ierr;
5024   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5025   IS                isrowb,iscolb;
5026   Mat               *bseq;
5027 
5028   PetscFunctionBegin;
5029   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5030     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5031   }
5032   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5033 
5034   if (scall == MAT_INITIAL_MATRIX){
5035     start = A->cmap->rstart;
5036     cmap  = a->garray;
5037     nzA   = a->A->cmap->n;
5038     nzB   = a->B->cmap->n;
5039     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5040     ncols = 0;
5041     for (i=0; i<nzB; i++) {  /* row < local row index */
5042       if (cmap[i] < start) idx[ncols++] = cmap[i];
5043       else break;
5044     }
5045     imark = i;
5046     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5047     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5048     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5049     *brstart = imark;
5050     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5051   } else {
5052     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5053     isrowb = *rowb; iscolb = *colb;
5054     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5055     bseq[0] = *B_seq;
5056   }
5057   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5058   *B_seq = bseq[0];
5059   ierr = PetscFree(bseq);CHKERRQ(ierr);
5060   if (!rowb){
5061     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5062   } else {
5063     *rowb = isrowb;
5064   }
5065   if (!colb){
5066     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5067   } else {
5068     *colb = iscolb;
5069   }
5070   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5071   PetscFunctionReturn(0);
5072 }
5073 
5074 #undef __FUNCT__
5075 #define __FUNCT__ "MatGetBrowsOfAoCols"
5076 /*@C
5077     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5078     of the OFF-DIAGONAL portion of local A
5079 
5080     Collective on Mat
5081 
5082    Input Parameters:
5083 +    A,B - the matrices in mpiaij format
5084 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5085 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5086 .    startsj_r - similar to startsj for receives
5087 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5088 
5089    Output Parameter:
5090 +    B_oth - the sequential matrix generated
5091 
5092     Level: developer
5093 
5094 @*/
5095 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5096 {
5097   VecScatter_MPI_General *gen_to,*gen_from;
5098   PetscErrorCode         ierr;
5099   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5100   Mat_SeqAIJ             *b_oth;
5101   VecScatter             ctx=a->Mvctx;
5102   MPI_Comm               comm=((PetscObject)ctx)->comm;
5103   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5104   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5105   PetscScalar            *rvalues,*svalues;
5106   MatScalar              *b_otha,*bufa,*bufA;
5107   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5108   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5109   MPI_Status             *sstatus,rstatus;
5110   PetscMPIInt            jj;
5111   PetscInt               *cols,sbs,rbs;
5112   PetscScalar            *vals;
5113 
5114   PetscFunctionBegin;
5115   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5116     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5117   }
5118   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5119   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5120 
5121   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5122   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5123   rvalues  = gen_from->values; /* holds the length of receiving row */
5124   svalues  = gen_to->values;   /* holds the length of sending row */
5125   nrecvs   = gen_from->n;
5126   nsends   = gen_to->n;
5127 
5128   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5129   srow     = gen_to->indices;   /* local row index to be sent */
5130   sstarts  = gen_to->starts;
5131   sprocs   = gen_to->procs;
5132   sstatus  = gen_to->sstatus;
5133   sbs      = gen_to->bs;
5134   rstarts  = gen_from->starts;
5135   rprocs   = gen_from->procs;
5136   rbs      = gen_from->bs;
5137 
5138   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5139   if (scall == MAT_INITIAL_MATRIX){
5140     /* i-array */
5141     /*---------*/
5142     /*  post receives */
5143     for (i=0; i<nrecvs; i++){
5144       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5145       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5146       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5147     }
5148 
5149     /* pack the outgoing message */
5150     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5151     sstartsj[0] = 0;  rstartsj[0] = 0;
5152     len = 0; /* total length of j or a array to be sent */
5153     k = 0;
5154     for (i=0; i<nsends; i++){
5155       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5156       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5157       for (j=0; j<nrows; j++) {
5158         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5159         for (l=0; l<sbs; l++){
5160           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5161           rowlen[j*sbs+l] = ncols;
5162           len += ncols;
5163           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5164         }
5165         k++;
5166       }
5167       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5168       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5169     }
5170     /* recvs and sends of i-array are completed */
5171     i = nrecvs;
5172     while (i--) {
5173       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5174     }
5175     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5176 
5177     /* allocate buffers for sending j and a arrays */
5178     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5179     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5180 
5181     /* create i-array of B_oth */
5182     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5183     b_othi[0] = 0;
5184     len = 0; /* total length of j or a array to be received */
5185     k = 0;
5186     for (i=0; i<nrecvs; i++){
5187       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5188       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5189       for (j=0; j<nrows; j++) {
5190         b_othi[k+1] = b_othi[k] + rowlen[j];
5191         len += rowlen[j]; k++;
5192       }
5193       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5194     }
5195 
5196     /* allocate space for j and a arrrays of B_oth */
5197     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5198     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5199 
5200     /* j-array */
5201     /*---------*/
5202     /*  post receives of j-array */
5203     for (i=0; i<nrecvs; i++){
5204       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5205       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5206     }
5207 
5208     /* pack the outgoing message j-array */
5209     k = 0;
5210     for (i=0; i<nsends; i++){
5211       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5212       bufJ = bufj+sstartsj[i];
5213       for (j=0; j<nrows; j++) {
5214         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5215         for (ll=0; ll<sbs; ll++){
5216           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5217           for (l=0; l<ncols; l++){
5218             *bufJ++ = cols[l];
5219           }
5220           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5221         }
5222       }
5223       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5224     }
5225 
5226     /* recvs and sends of j-array are completed */
5227     i = nrecvs;
5228     while (i--) {
5229       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5230     }
5231     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5232   } else if (scall == MAT_REUSE_MATRIX){
5233     sstartsj = *startsj;
5234     rstartsj = *startsj_r;
5235     bufa     = *bufa_ptr;
5236     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5237     b_otha   = b_oth->a;
5238   } else {
5239     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5240   }
5241 
5242   /* a-array */
5243   /*---------*/
5244   /*  post receives of a-array */
5245   for (i=0; i<nrecvs; i++){
5246     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5247     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5248   }
5249 
5250   /* pack the outgoing message a-array */
5251   k = 0;
5252   for (i=0; i<nsends; i++){
5253     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5254     bufA = bufa+sstartsj[i];
5255     for (j=0; j<nrows; j++) {
5256       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5257       for (ll=0; ll<sbs; ll++){
5258         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5259         for (l=0; l<ncols; l++){
5260           *bufA++ = vals[l];
5261         }
5262         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5263       }
5264     }
5265     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5266   }
5267   /* recvs and sends of a-array are completed */
5268   i = nrecvs;
5269   while (i--) {
5270     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5271   }
5272   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5273   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5274 
5275   if (scall == MAT_INITIAL_MATRIX){
5276     /* put together the new matrix */
5277     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5278 
5279     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5280     /* Since these are PETSc arrays, change flags to free them as necessary. */
5281     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5282     b_oth->free_a  = PETSC_TRUE;
5283     b_oth->free_ij = PETSC_TRUE;
5284     b_oth->nonew   = 0;
5285 
5286     ierr = PetscFree(bufj);CHKERRQ(ierr);
5287     if (!startsj || !bufa_ptr){
5288       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5289       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5290     } else {
5291       *startsj   = sstartsj;
5292       *startsj_r = rstartsj;
5293       *bufa_ptr  = bufa;
5294     }
5295   }
5296   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5297   PetscFunctionReturn(0);
5298 }
5299 
5300 #undef __FUNCT__
5301 #define __FUNCT__ "MatGetCommunicationStructs"
5302 /*@C
5303   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5304 
5305   Not Collective
5306 
5307   Input Parameters:
5308 . A - The matrix in mpiaij format
5309 
5310   Output Parameter:
5311 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5312 . colmap - A map from global column index to local index into lvec
5313 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5314 
5315   Level: developer
5316 
5317 @*/
5318 #if defined (PETSC_USE_CTABLE)
5319 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5320 #else
5321 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5322 #endif
5323 {
5324   Mat_MPIAIJ *a;
5325 
5326   PetscFunctionBegin;
5327   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5328   PetscValidPointer(lvec, 2);
5329   PetscValidPointer(colmap, 3);
5330   PetscValidPointer(multScatter, 4);
5331   a = (Mat_MPIAIJ *) A->data;
5332   if (lvec) *lvec = a->lvec;
5333   if (colmap) *colmap = a->colmap;
5334   if (multScatter) *multScatter = a->Mvctx;
5335   PetscFunctionReturn(0);
5336 }
5337 
5338 EXTERN_C_BEGIN
5339 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5340 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5341 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5342 EXTERN_C_END
5343 
5344 #undef __FUNCT__
5345 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5346 /*
5347     Computes (B'*A')' since computing B*A directly is untenable
5348 
5349                n                       p                          p
5350         (              )       (              )         (                  )
5351       m (      A       )  *  n (       B      )   =   m (         C        )
5352         (              )       (              )         (                  )
5353 
5354 */
5355 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5356 {
5357   PetscErrorCode     ierr;
5358   Mat                At,Bt,Ct;
5359 
5360   PetscFunctionBegin;
5361   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5362   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5363   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5364   ierr = MatDestroy(&At);CHKERRQ(ierr);
5365   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5366   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5367   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5368   PetscFunctionReturn(0);
5369 }
5370 
5371 #undef __FUNCT__
5372 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5373 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5374 {
5375   PetscErrorCode ierr;
5376   PetscInt       m=A->rmap->n,n=B->cmap->n;
5377   Mat            Cmat;
5378 
5379   PetscFunctionBegin;
5380   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5381   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5382   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5383   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5384   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5385   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5386   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5387   *C   = Cmat;
5388   PetscFunctionReturn(0);
5389 }
5390 
5391 /* ----------------------------------------------------------------*/
5392 #undef __FUNCT__
5393 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5394 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5395 {
5396   PetscErrorCode ierr;
5397 
5398   PetscFunctionBegin;
5399   if (scall == MAT_INITIAL_MATRIX){
5400     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5401   }
5402   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5403   PetscFunctionReturn(0);
5404 }
5405 
5406 EXTERN_C_BEGIN
5407 #if defined(PETSC_HAVE_MUMPS)
5408 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5409 #endif
5410 #if defined(PETSC_HAVE_PASTIX)
5411 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5412 #endif
5413 #if defined(PETSC_HAVE_SUPERLU_DIST)
5414 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5415 #endif
5416 #if defined(PETSC_HAVE_SPOOLES)
5417 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5418 #endif
5419 EXTERN_C_END
5420 
5421 /*MC
5422    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5423 
5424    Options Database Keys:
5425 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5426 
5427   Level: beginner
5428 
5429 .seealso: MatCreateMPIAIJ()
5430 M*/
5431 
5432 EXTERN_C_BEGIN
5433 #undef __FUNCT__
5434 #define __FUNCT__ "MatCreate_MPIAIJ"
5435 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5436 {
5437   Mat_MPIAIJ     *b;
5438   PetscErrorCode ierr;
5439   PetscMPIInt    size;
5440 
5441   PetscFunctionBegin;
5442   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5443 
5444   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5445   B->data         = (void*)b;
5446   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5447   B->rmap->bs     = 1;
5448   B->assembled    = PETSC_FALSE;
5449 
5450   B->insertmode   = NOT_SET_VALUES;
5451   b->size         = size;
5452   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5453 
5454   /* build cache for off array entries formed */
5455   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5456   b->donotstash  = PETSC_FALSE;
5457   b->colmap      = 0;
5458   b->garray      = 0;
5459   b->roworiented = PETSC_TRUE;
5460 
5461   /* stuff used for matrix vector multiply */
5462   b->lvec      = PETSC_NULL;
5463   b->Mvctx     = PETSC_NULL;
5464 
5465   /* stuff for MatGetRow() */
5466   b->rowindices   = 0;
5467   b->rowvalues    = 0;
5468   b->getrowactive = PETSC_FALSE;
5469 
5470 #if defined(PETSC_HAVE_SPOOLES)
5471   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5472                                      "MatGetFactor_mpiaij_spooles",
5473                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5474 #endif
5475 #if defined(PETSC_HAVE_MUMPS)
5476   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5477                                      "MatGetFactor_aij_mumps",
5478                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5479 #endif
5480 #if defined(PETSC_HAVE_PASTIX)
5481   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5482 					   "MatGetFactor_mpiaij_pastix",
5483 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5484 #endif
5485 #if defined(PETSC_HAVE_SUPERLU_DIST)
5486   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5487                                      "MatGetFactor_mpiaij_superlu_dist",
5488                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5489 #endif
5490   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5491                                      "MatStoreValues_MPIAIJ",
5492                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5493   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5494                                      "MatRetrieveValues_MPIAIJ",
5495                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5496   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5497 				     "MatGetDiagonalBlock_MPIAIJ",
5498                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5499   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5500 				     "MatIsTranspose_MPIAIJ",
5501 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5502   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5503 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5504 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5505   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5506 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5507 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5508   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5509 				     "MatDiagonalScaleLocal_MPIAIJ",
5510 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5511   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5512                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5513                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5514   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5515                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5516                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5517   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5518                                      "MatConvert_MPIAIJ_MPISBAIJ",
5519                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5520   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5521                                      "MatMatMult_MPIDense_MPIAIJ",
5522                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5523   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5524                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5525                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5526   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5527                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5528                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5529   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5530   PetscFunctionReturn(0);
5531 }
5532 EXTERN_C_END
5533 
5534 #undef __FUNCT__
5535 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5536 /*@
5537      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5538          and "off-diagonal" part of the matrix in CSR format.
5539 
5540    Collective on MPI_Comm
5541 
5542    Input Parameters:
5543 +  comm - MPI communicator
5544 .  m - number of local rows (Cannot be PETSC_DECIDE)
5545 .  n - This value should be the same as the local size used in creating the
5546        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5547        calculated if N is given) For square matrices n is almost always m.
5548 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5549 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5550 .   i - row indices for "diagonal" portion of matrix
5551 .   j - column indices
5552 .   a - matrix values
5553 .   oi - row indices for "off-diagonal" portion of matrix
5554 .   oj - column indices
5555 -   oa - matrix values
5556 
5557    Output Parameter:
5558 .   mat - the matrix
5559 
5560    Level: advanced
5561 
5562    Notes:
5563        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5564        must free the arrays once the matrix has been destroyed and not before.
5565 
5566        The i and j indices are 0 based
5567 
5568        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5569 
5570        This sets local rows and cannot be used to set off-processor values.
5571 
5572        You cannot later use MatSetValues() to change values in this matrix.
5573 
5574 .keywords: matrix, aij, compressed row, sparse, parallel
5575 
5576 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5577           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5578 @*/
5579 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5580 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5581 {
5582   PetscErrorCode ierr;
5583   Mat_MPIAIJ     *maij;
5584 
5585  PetscFunctionBegin;
5586   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5587   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5588   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5589   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5590   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5591   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5592   maij = (Mat_MPIAIJ*) (*mat)->data;
5593   maij->donotstash     = PETSC_TRUE;
5594   (*mat)->preallocated = PETSC_TRUE;
5595 
5596   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5597   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5598   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5599   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5600 
5601   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5602   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5603 
5604   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5605   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5606   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5607   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5608 
5609   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5610   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5611   PetscFunctionReturn(0);
5612 }
5613 
5614 /*
5615     Special version for direct calls from Fortran
5616 */
5617 #include <private/fortranimpl.h>
5618 
5619 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5620 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5621 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5622 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5623 #endif
5624 
5625 /* Change these macros so can be used in void function */
5626 #undef CHKERRQ
5627 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5628 #undef SETERRQ2
5629 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5630 #undef SETERRQ
5631 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5632 
5633 EXTERN_C_BEGIN
5634 #undef __FUNCT__
5635 #define __FUNCT__ "matsetvaluesmpiaij_"
5636 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5637 {
5638   Mat             mat = *mmat;
5639   PetscInt        m = *mm, n = *mn;
5640   InsertMode      addv = *maddv;
5641   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5642   PetscScalar     value;
5643   PetscErrorCode  ierr;
5644 
5645   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5646   if (mat->insertmode == NOT_SET_VALUES) {
5647     mat->insertmode = addv;
5648   }
5649 #if defined(PETSC_USE_DEBUG)
5650   else if (mat->insertmode != addv) {
5651     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5652   }
5653 #endif
5654   {
5655   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5656   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5657   PetscBool       roworiented = aij->roworiented;
5658 
5659   /* Some Variables required in the macro */
5660   Mat             A = aij->A;
5661   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5662   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5663   MatScalar       *aa = a->a;
5664   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5665   Mat             B = aij->B;
5666   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5667   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5668   MatScalar       *ba = b->a;
5669 
5670   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5671   PetscInt        nonew = a->nonew;
5672   MatScalar       *ap1,*ap2;
5673 
5674   PetscFunctionBegin;
5675   for (i=0; i<m; i++) {
5676     if (im[i] < 0) continue;
5677 #if defined(PETSC_USE_DEBUG)
5678     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5679 #endif
5680     if (im[i] >= rstart && im[i] < rend) {
5681       row      = im[i] - rstart;
5682       lastcol1 = -1;
5683       rp1      = aj + ai[row];
5684       ap1      = aa + ai[row];
5685       rmax1    = aimax[row];
5686       nrow1    = ailen[row];
5687       low1     = 0;
5688       high1    = nrow1;
5689       lastcol2 = -1;
5690       rp2      = bj + bi[row];
5691       ap2      = ba + bi[row];
5692       rmax2    = bimax[row];
5693       nrow2    = bilen[row];
5694       low2     = 0;
5695       high2    = nrow2;
5696 
5697       for (j=0; j<n; j++) {
5698         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5699         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5700         if (in[j] >= cstart && in[j] < cend){
5701           col = in[j] - cstart;
5702           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5703         } else if (in[j] < 0) continue;
5704 #if defined(PETSC_USE_DEBUG)
5705         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5706 #endif
5707         else {
5708           if (mat->was_assembled) {
5709             if (!aij->colmap) {
5710               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5711             }
5712 #if defined (PETSC_USE_CTABLE)
5713             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5714 	    col--;
5715 #else
5716             col = aij->colmap[in[j]] - 1;
5717 #endif
5718             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5719               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5720               col =  in[j];
5721               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5722               B = aij->B;
5723               b = (Mat_SeqAIJ*)B->data;
5724               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5725               rp2      = bj + bi[row];
5726               ap2      = ba + bi[row];
5727               rmax2    = bimax[row];
5728               nrow2    = bilen[row];
5729               low2     = 0;
5730               high2    = nrow2;
5731               bm       = aij->B->rmap->n;
5732               ba = b->a;
5733             }
5734           } else col = in[j];
5735           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5736         }
5737       }
5738     } else {
5739       if (!aij->donotstash) {
5740         if (roworiented) {
5741           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5742         } else {
5743           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5744         }
5745       }
5746     }
5747   }}
5748   PetscFunctionReturnVoid();
5749 }
5750 EXTERN_C_END
5751 
5752