xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f692024ea6ceda132bc9ff30ca7a31e85cfbbcb2)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew = a->nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506         } else if (in[j] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509 #endif
510         else {
511           if (mat->was_assembled) {
512             if (!aij->colmap) {
513               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
514             }
515 #if defined (PETSC_USE_CTABLE)
516             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
517 	    col--;
518 #else
519             col = aij->colmap[in[j]] - 1;
520 #endif
521             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523               col =  in[j];
524               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525               B = aij->B;
526               b = (Mat_SeqAIJ*)B->data;
527               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528               rp2      = bj + bi[row];
529               ap2      = ba + bi[row];
530               rmax2    = bimax[row];
531               nrow2    = bilen[row];
532               low2     = 0;
533               high2    = nrow2;
534               bm       = aij->B->rmap->n;
535               ba = b->a;
536             }
537           } else col = in[j];
538           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539         }
540       }
541     } else {
542       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543       if (!aij->donotstash) {
544         if (roworiented) {
545           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
546         } else {
547           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPIAIJ"
557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558 {
559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
560   PetscErrorCode ierr;
561   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
563 
564   PetscFunctionBegin;
565   for (i=0; i<m; i++) {
566     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568     if (idxm[i] >= rstart && idxm[i] < rend) {
569       row = idxm[i] - rstart;
570       for (j=0; j<n; j++) {
571         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573         if (idxn[j] >= cstart && idxn[j] < cend){
574           col = idxn[j] - cstart;
575           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
576         } else {
577           if (!aij->colmap) {
578             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
579           }
580 #if defined (PETSC_USE_CTABLE)
581           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
582           col --;
583 #else
584           col = aij->colmap[idxn[j]] - 1;
585 #endif
586           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587           else {
588             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604 {
605   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
606   PetscErrorCode ierr;
607   PetscInt       nstash,reallocs;
608   InsertMode     addv;
609 
610   PetscFunctionBegin;
611   if (aij->donotstash || mat->nooffprocentries) {
612     PetscFunctionReturn(0);
613   }
614 
615   /* make sure all processors are either in INSERTMODE or ADDMODE */
616   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
617   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618   mat->insertmode = addv; /* in case this processor had no cache */
619 
620   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
621   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
622   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
632   PetscErrorCode ierr;
633   PetscMPIInt    n;
634   PetscInt       i,j,rstart,ncols,flg;
635   PetscInt       *row,*col;
636   PetscBool      other_disassembled;
637   PetscScalar    *val;
638   InsertMode     addv = mat->insertmode;
639 
640   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
641   PetscFunctionBegin;
642   if (!aij->donotstash && !mat->nooffprocentries) {
643     while (1) {
644       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
645       if (!flg) break;
646 
647       for (i=0; i<n;) {
648         /* Now identify the consecutive vals belonging to the same row */
649         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650         if (j < n) ncols = j-i;
651         else       ncols = n-i;
652         /* Now assemble all these values with a single function call */
653         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
654         i = j;
655       }
656     }
657     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
658   }
659   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
660   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
661 
662   /* determine if any processor has disassembled, if so we must
663      also disassemble ourselfs, in order that we may reassemble. */
664   /*
665      if nonzero structure of submatrix B cannot change then we know that
666      no processor disassembled thus we can skip this stuff
667   */
668   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
669     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
670     if (mat->was_assembled && !other_disassembled) {
671       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
672     }
673   }
674   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
676   }
677   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
678   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
679   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
681 
682   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
683   aij->rowvalues = 0;
684 
685   /* used by MatAXPY() */
686   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
687   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
688 
689   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
690   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697 {
698   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
699   PetscErrorCode ierr;
700 
701   PetscFunctionBegin;
702   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
703   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatZeroRows_MPIAIJ"
709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710 {
711   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
712   PetscErrorCode    ierr;
713   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714   PetscInt          i,*owners = A->rmap->range;
715   PetscInt          *nprocs,j,idx,nsends,row;
716   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
717   PetscInt          *rvalues,count,base,slen,*source;
718   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
719   MPI_Comm          comm = ((PetscObject)A)->comm;
720   MPI_Request       *send_waits,*recv_waits;
721   MPI_Status        recv_status,*send_status;
722   const PetscScalar *xx;
723   PetscScalar       *bb;
724 #if defined(PETSC_DEBUG)
725   PetscBool      found = PETSC_FALSE;
726 #endif
727 
728   PetscFunctionBegin;
729   /*  first count number of contributors to each processor */
730   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
731   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
732   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
733   j = 0;
734   for (i=0; i<N; i++) {
735     if (lastidx > (idx = rows[i])) j = 0;
736     lastidx = idx;
737     for (; j<size; j++) {
738       if (idx >= owners[j] && idx < owners[j+1]) {
739         nprocs[2*j]++;
740         nprocs[2*j+1] = 1;
741         owner[i] = j;
742 #if defined(PETSC_DEBUG)
743         found = PETSC_TRUE;
744 #endif
745         break;
746       }
747     }
748 #if defined(PETSC_DEBUG)
749     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750     found = PETSC_FALSE;
751 #endif
752   }
753   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
754 
755   if (A->nooffproczerorows) {
756     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757     nrecvs = nsends;
758     nmax   = N;
759   } else {
760     /* inform other processors of number of messages and max length*/
761     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
762   }
763 
764   /* post receives:   */
765   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
766   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
767   for (i=0; i<nrecvs; i++) {
768     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
769   }
770 
771   /* do sends:
772       1) starts[i] gives the starting index in svalues for stuff going to
773          the ith processor
774   */
775   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
776   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
777   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
778   starts[0] = 0;
779   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780   for (i=0; i<N; i++) {
781     svalues[starts[owner[i]]++] = rows[i];
782   }
783 
784   starts[0] = 0;
785   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786   count = 0;
787   for (i=0; i<size; i++) {
788     if (nprocs[2*i+1]) {
789       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
790     }
791   }
792   ierr = PetscFree(starts);CHKERRQ(ierr);
793 
794   base = owners[rank];
795 
796   /*  wait on receives */
797   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
798   count  = nrecvs; slen = 0;
799   while (count) {
800     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
801     /* unpack receives into our local space */
802     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
803     source[imdex]  = recv_status.MPI_SOURCE;
804     lens[imdex]    = n;
805     slen          += n;
806     count--;
807   }
808   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
809 
810   /* move the data into the send scatter */
811   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
812   count = 0;
813   for (i=0; i<nrecvs; i++) {
814     values = rvalues + i*nmax;
815     for (j=0; j<lens[i]; j++) {
816       lrows[count++] = values[j] - base;
817     }
818   }
819   ierr = PetscFree(rvalues);CHKERRQ(ierr);
820   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
821   ierr = PetscFree(owner);CHKERRQ(ierr);
822   ierr = PetscFree(nprocs);CHKERRQ(ierr);
823 
824   /* fix right hand side if needed */
825   if (x && b) {
826     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
827     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
828     for (i=0; i<slen; i++) {
829       bb[lrows[i]] = diag*xx[lrows[i]];
830     }
831     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
833   }
834   /*
835         Zero the required rows. If the "diagonal block" of the matrix
836      is square and the user wishes to set the diagonal we use separate
837      code so that MatSetValues() is not called for each diagonal allocating
838      new memory, thus calling lots of mallocs and slowing things down.
839 
840   */
841   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
843   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850     }
851     for (i = 0; i < slen; i++) {
852       row  = lrows[i] + rstart;
853       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
854     }
855     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857   } else {
858     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(lrows);CHKERRQ(ierr);
861 
862   /* wait on sends */
863   if (nsends) {
864     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
865     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
866     ierr = PetscFree(send_status);CHKERRQ(ierr);
867   }
868   ierr = PetscFree(send_waits);CHKERRQ(ierr);
869   ierr = PetscFree(svalues);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
876 {
877   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode    ierr;
879   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
880   PetscInt          i,*owners = A->rmap->range;
881   PetscInt          *nprocs,j,idx,nsends;
882   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
883   PetscInt          *rvalues,count,base,slen,*source;
884   PetscInt          *lens,*lrows,*values,m;
885   MPI_Comm          comm = ((PetscObject)A)->comm;
886   MPI_Request       *send_waits,*recv_waits;
887   MPI_Status        recv_status,*send_status;
888   const PetscScalar *xx;
889   PetscScalar       *bb,*mask;
890   Vec               xmask,lmask;
891   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
892   const PetscInt    *aj, *ii,*ridx;
893   PetscScalar       *aa;
894 #if defined(PETSC_DEBUG)
895   PetscBool         found = PETSC_FALSE;
896 #endif
897 
898   PetscFunctionBegin;
899   /*  first count number of contributors to each processor */
900   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
901   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
902   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
903   j = 0;
904   for (i=0; i<N; i++) {
905     if (lastidx > (idx = rows[i])) j = 0;
906     lastidx = idx;
907     for (; j<size; j++) {
908       if (idx >= owners[j] && idx < owners[j+1]) {
909         nprocs[2*j]++;
910         nprocs[2*j+1] = 1;
911         owner[i] = j;
912 #if defined(PETSC_DEBUG)
913         found = PETSC_TRUE;
914 #endif
915         break;
916       }
917     }
918 #if defined(PETSC_DEBUG)
919     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
920     found = PETSC_FALSE;
921 #endif
922   }
923   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
924 
925   /* inform other processors of number of messages and max length*/
926   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
927 
928   /* post receives:   */
929   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
930   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
931   for (i=0; i<nrecvs; i++) {
932     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
933   }
934 
935   /* do sends:
936       1) starts[i] gives the starting index in svalues for stuff going to
937          the ith processor
938   */
939   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
940   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
941   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
942   starts[0] = 0;
943   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
944   for (i=0; i<N; i++) {
945     svalues[starts[owner[i]]++] = rows[i];
946   }
947 
948   starts[0] = 0;
949   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
950   count = 0;
951   for (i=0; i<size; i++) {
952     if (nprocs[2*i+1]) {
953       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
954     }
955   }
956   ierr = PetscFree(starts);CHKERRQ(ierr);
957 
958   base = owners[rank];
959 
960   /*  wait on receives */
961   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
962   count  = nrecvs; slen = 0;
963   while (count) {
964     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
965     /* unpack receives into our local space */
966     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
967     source[imdex]  = recv_status.MPI_SOURCE;
968     lens[imdex]    = n;
969     slen          += n;
970     count--;
971   }
972   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
973 
974   /* move the data into the send scatter */
975   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
976   count = 0;
977   for (i=0; i<nrecvs; i++) {
978     values = rvalues + i*nmax;
979     for (j=0; j<lens[i]; j++) {
980       lrows[count++] = values[j] - base;
981     }
982   }
983   ierr = PetscFree(rvalues);CHKERRQ(ierr);
984   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
985   ierr = PetscFree(owner);CHKERRQ(ierr);
986   ierr = PetscFree(nprocs);CHKERRQ(ierr);
987   /* lrows are the local rows to be zeroed, slen is the number of local rows */
988 
989   /* zero diagonal part of matrix */
990   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
991 
992   /* handle off diagonal part of matrix */
993   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
994   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
995   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
996   for (i=0; i<slen; i++) {
997     bb[lrows[i]] = 1;
998   }
999   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1000   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1003   if (x) {
1004     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1007     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1008   }
1009   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1010 
1011   /* remove zeroed rows of off diagonal matrix */
1012   ii = aij->i;
1013   for (i=0; i<slen; i++) {
1014     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1015   }
1016 
1017   /* loop over all elements of off process part of matrix zeroing removed columns*/
1018   if (aij->compressedrow.use){
1019     m    = aij->compressedrow.nrows;
1020     ii   = aij->compressedrow.i;
1021     ridx = aij->compressedrow.rindex;
1022     for (i=0; i<m; i++){
1023       n   = ii[i+1] - ii[i];
1024       aj  = aij->j + ii[i];
1025       aa  = aij->a + ii[i];
1026 
1027       for (j=0; j<n; j++) {
1028         if (PetscAbsScalar(mask[*aj])) {
1029           if (b) bb[*ridx] -= *aa*xx[*aj];
1030           *aa        = 0.0;
1031         }
1032         aa++;
1033         aj++;
1034       }
1035       ridx++;
1036     }
1037   } else { /* do not use compressed row format */
1038     m = l->B->rmap->n;
1039     for (i=0; i<m; i++) {
1040       n   = ii[i+1] - ii[i];
1041       aj  = aij->j + ii[i];
1042       aa  = aij->a + ii[i];
1043       for (j=0; j<n; j++) {
1044         if (PetscAbsScalar(mask[*aj])) {
1045           if (b) bb[i] -= *aa*xx[*aj];
1046           *aa    = 0.0;
1047         }
1048         aa++;
1049         aj++;
1050       }
1051     }
1052   }
1053   if (x) {
1054     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1055     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1056   }
1057   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1058   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1059   ierr = PetscFree(lrows);CHKERRQ(ierr);
1060 
1061   /* wait on sends */
1062   if (nsends) {
1063     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1064     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1065     ierr = PetscFree(send_status);CHKERRQ(ierr);
1066   }
1067   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1068   ierr = PetscFree(svalues);CHKERRQ(ierr);
1069 
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatMult_MPIAIJ"
1075 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1076 {
1077   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1078   PetscErrorCode ierr;
1079   PetscInt       nt;
1080 
1081   PetscFunctionBegin;
1082   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1083   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1084   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1085   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1086   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1087   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1093 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1094 {
1095   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1105 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1106 {
1107   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1120 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124   PetscBool      merged;
1125 
1126   PetscFunctionBegin;
1127   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1128   /* do nondiagonal part */
1129   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1130   if (!merged) {
1131     /* send it on its way */
1132     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1133     /* do local part */
1134     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1135     /* receive remote parts: note this assumes the values are not actually */
1136     /* added in yy until the next line, */
1137     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1138   } else {
1139     /* do local part */
1140     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1141     /* send it on its way */
1142     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1143     /* values actually were received in the Begin() but we need to call this nop */
1144     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1145   }
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 EXTERN_C_BEGIN
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1152 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1153 {
1154   MPI_Comm       comm;
1155   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1156   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1157   IS             Me,Notme;
1158   PetscErrorCode ierr;
1159   PetscInt       M,N,first,last,*notme,i;
1160   PetscMPIInt    size;
1161 
1162   PetscFunctionBegin;
1163 
1164   /* Easy test: symmetric diagonal block */
1165   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1166   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1167   if (!*f) PetscFunctionReturn(0);
1168   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1169   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1170   if (size == 1) PetscFunctionReturn(0);
1171 
1172   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1173   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1174   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1175   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1176   for (i=0; i<first; i++) notme[i] = i;
1177   for (i=last; i<M; i++) notme[i-last+first] = i;
1178   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1179   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1180   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1181   Aoff = Aoffs[0];
1182   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1183   Boff = Boffs[0];
1184   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1185   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1186   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1187   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1188   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1189   ierr = PetscFree(notme);CHKERRQ(ierr);
1190    PetscFunctionReturn(0);
1191 }
1192 EXTERN_C_END
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1196 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1197 {
1198   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   /* do nondiagonal part */
1203   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1204   /* send it on its way */
1205   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1206   /* do local part */
1207   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1208   /* receive remote parts */
1209   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 /*
1214   This only works correctly for square matrices where the subblock A->A is the
1215    diagonal block
1216 */
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1219 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1220 {
1221   PetscErrorCode ierr;
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223 
1224   PetscFunctionBegin;
1225   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1226   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1227   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatScale_MPIAIJ"
1233 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1234 {
1235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1240   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatDestroy_MPIAIJ"
1246 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1247 {
1248   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252 #if defined(PETSC_USE_LOG)
1253   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1254 #endif
1255   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1256   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1257   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1258   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1259 #if defined (PETSC_USE_CTABLE)
1260   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1261 #else
1262   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1263 #endif
1264   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1265   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1266   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1267   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1268   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1269   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1270 
1271   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1285 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1286 {
1287   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1288   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1289   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1290   PetscErrorCode    ierr;
1291   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1292   int               fd;
1293   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1294   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1295   PetscScalar       *column_values;
1296   PetscInt          message_count,flowcontrolcount;
1297 
1298   PetscFunctionBegin;
1299   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1300   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1301   nz   = A->nz + B->nz;
1302   if (!rank) {
1303     header[0] = MAT_FILE_CLASSID;
1304     header[1] = mat->rmap->N;
1305     header[2] = mat->cmap->N;
1306     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1307     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1308     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1309     /* get largest number of rows any processor has */
1310     rlen = mat->rmap->n;
1311     range = mat->rmap->range;
1312     for (i=1; i<size; i++) {
1313       rlen = PetscMax(rlen,range[i+1] - range[i]);
1314     }
1315   } else {
1316     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1317     rlen = mat->rmap->n;
1318   }
1319 
1320   /* load up the local row counts */
1321   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1322   for (i=0; i<mat->rmap->n; i++) {
1323     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1324   }
1325 
1326   /* store the row lengths to the file */
1327   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1328   if (!rank) {
1329     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1330     for (i=1; i<size; i++) {
1331       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1332       rlen = range[i+1] - range[i];
1333       ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1334       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     }
1336     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1337   } else {
1338     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1339     ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1340     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1341   }
1342   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1343 
1344   /* load up the local column indices */
1345   nzmax = nz; /* )th processor needs space a largest processor needs */
1346   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1347   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1348   cnt  = 0;
1349   for (i=0; i<mat->rmap->n; i++) {
1350     for (j=B->i[i]; j<B->i[i+1]; j++) {
1351       if ( (col = garray[B->j[j]]) > cstart) break;
1352       column_indices[cnt++] = col;
1353     }
1354     for (k=A->i[i]; k<A->i[i+1]; k++) {
1355       column_indices[cnt++] = A->j[k] + cstart;
1356     }
1357     for (; j<B->i[i+1]; j++) {
1358       column_indices[cnt++] = garray[B->j[j]];
1359     }
1360   }
1361   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1362 
1363   /* store the column indices to the file */
1364    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1365   if (!rank) {
1366     MPI_Status status;
1367     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1368     for (i=1; i<size; i++) {
1369       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1370       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1371       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1372       ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1373       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1374     }
1375      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1376   } else {
1377     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1378     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1379     ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1380     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1381   }
1382   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1383 
1384   /* load up the local column values */
1385   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1386   cnt  = 0;
1387   for (i=0; i<mat->rmap->n; i++) {
1388     for (j=B->i[i]; j<B->i[i+1]; j++) {
1389       if ( garray[B->j[j]] > cstart) break;
1390       column_values[cnt++] = B->a[j];
1391     }
1392     for (k=A->i[i]; k<A->i[i+1]; k++) {
1393       column_values[cnt++] = A->a[k];
1394     }
1395     for (; j<B->i[i+1]; j++) {
1396       column_values[cnt++] = B->a[j];
1397     }
1398   }
1399   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1400 
1401   /* store the column values to the file */
1402    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1403   if (!rank) {
1404     MPI_Status status;
1405     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1406     for (i=1; i<size; i++) {
1407        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1408       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1409       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1410       ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1411       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1412     }
1413     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1414   } else {
1415     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1416     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1417     ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1418     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1419   }
1420   ierr = PetscFree(column_values);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1426 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1427 {
1428   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1429   PetscErrorCode    ierr;
1430   PetscMPIInt       rank = aij->rank,size = aij->size;
1431   PetscBool         isdraw,iascii,isbinary;
1432   PetscViewer       sviewer;
1433   PetscViewerFormat format;
1434 
1435   PetscFunctionBegin;
1436   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1437   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1438   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1439   if (iascii) {
1440     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1441     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1442       MatInfo    info;
1443       PetscBool  inodes;
1444 
1445       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1446       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1447       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1448       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1449       if (!inodes) {
1450         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1451 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1452       } else {
1453         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1454 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1455       }
1456       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1457       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1458       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1459       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1460       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1461       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1462       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1463       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1464       PetscFunctionReturn(0);
1465     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1466       PetscInt   inodecount,inodelimit,*inodes;
1467       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1468       if (inodes) {
1469         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1472       }
1473       PetscFunctionReturn(0);
1474     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1475       PetscFunctionReturn(0);
1476     }
1477   } else if (isbinary) {
1478     if (size == 1) {
1479       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1480       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1481     } else {
1482       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1483     }
1484     PetscFunctionReturn(0);
1485   } else if (isdraw) {
1486     PetscDraw  draw;
1487     PetscBool  isnull;
1488     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1489     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1490   }
1491 
1492   if (size == 1) {
1493     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1494     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1495   } else {
1496     /* assemble the entire matrix onto first processor. */
1497     Mat         A;
1498     Mat_SeqAIJ  *Aloc;
1499     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1500     MatScalar   *a;
1501 
1502     if (mat->rmap->N > 1024) {
1503       PetscBool  flg = PETSC_FALSE;
1504 
1505       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1506       if (!flg) {
1507         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1508       }
1509     }
1510 
1511     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1512     if (!rank) {
1513       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1514     } else {
1515       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1516     }
1517     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1518     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1519     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1520     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1521     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1522 
1523     /* copy over the A part */
1524     Aloc = (Mat_SeqAIJ*)aij->A->data;
1525     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1526     row = mat->rmap->rstart;
1527     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1528     for (i=0; i<m; i++) {
1529       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1530       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1531     }
1532     aj = Aloc->j;
1533     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1534 
1535     /* copy over the B part */
1536     Aloc = (Mat_SeqAIJ*)aij->B->data;
1537     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1538     row  = mat->rmap->rstart;
1539     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1540     ct   = cols;
1541     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1542     for (i=0; i<m; i++) {
1543       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1544       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1545     }
1546     ierr = PetscFree(ct);CHKERRQ(ierr);
1547     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1548     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1549     /*
1550        Everyone has to call to draw the matrix since the graphics waits are
1551        synchronized across all processors that share the PetscDraw object
1552     */
1553     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1554     if (!rank) {
1555       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1556       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1557       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1558       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1559     }
1560     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1561     ierr = MatDestroy(&A);CHKERRQ(ierr);
1562   }
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatView_MPIAIJ"
1568 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1569 {
1570   PetscErrorCode ierr;
1571   PetscBool      iascii,isdraw,issocket,isbinary;
1572 
1573   PetscFunctionBegin;
1574   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1575   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1576   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1577   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1578   if (iascii || isdraw || isbinary || issocket) {
1579     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1580   } else {
1581     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1582   }
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatSOR_MPIAIJ"
1588 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1589 {
1590   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1591   PetscErrorCode ierr;
1592   Vec            bb1 = 0;
1593   PetscBool      hasop;
1594 
1595   PetscFunctionBegin;
1596   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1597     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1598   }
1599 
1600   if (flag == SOR_APPLY_UPPER) {
1601     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1602     PetscFunctionReturn(0);
1603   }
1604 
1605   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1606     if (flag & SOR_ZERO_INITIAL_GUESS) {
1607       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1608       its--;
1609     }
1610 
1611     while (its--) {
1612       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1613       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1614 
1615       /* update rhs: bb1 = bb - B*x */
1616       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1617       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1618 
1619       /* local sweep */
1620       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1621     }
1622   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1623     if (flag & SOR_ZERO_INITIAL_GUESS) {
1624       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1625       its--;
1626     }
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   }  else if (flag & SOR_EISENSTAT) {
1655     Vec         xx1;
1656 
1657     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1658     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1659 
1660     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662     if (!mat->diag) {
1663       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1664       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1665     }
1666     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1667     if (hasop) {
1668       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1669     } else {
1670       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1671     }
1672     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1673 
1674     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1675 
1676     /* local sweep */
1677     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1678     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1679     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1680   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1681 
1682   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatPermute_MPIAIJ"
1688 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1689 {
1690   MPI_Comm       comm;
1691   PetscInt       first,local_rowsize,local_colsize;
1692   const PetscInt *rows;
1693   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1694   PetscErrorCode ierr;
1695 
1696   PetscFunctionBegin;
1697   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1698   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1699   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1700   /* collect the global row permutation and invert it */
1701   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1702   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1703   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1704   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1705   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1706   /* get the local target indices */
1707   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1708   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1709   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1710   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1711   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1712   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1713   /* the column permutation is so much easier;
1714      make a local version of 'colp' and invert it */
1715   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1716   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1717   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1718   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1719   /* now we just get the submatrix */
1720   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1721   /* clean up */
1722   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1723   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1729 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1730 {
1731   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1732   Mat            A = mat->A,B = mat->B;
1733   PetscErrorCode ierr;
1734   PetscReal      isend[5],irecv[5];
1735 
1736   PetscFunctionBegin;
1737   info->block_size     = 1.0;
1738   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1739   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1740   isend[3] = info->memory;  isend[4] = info->mallocs;
1741   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1742   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1743   isend[3] += info->memory;  isend[4] += info->mallocs;
1744   if (flag == MAT_LOCAL) {
1745     info->nz_used      = isend[0];
1746     info->nz_allocated = isend[1];
1747     info->nz_unneeded  = isend[2];
1748     info->memory       = isend[3];
1749     info->mallocs      = isend[4];
1750   } else if (flag == MAT_GLOBAL_MAX) {
1751     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1752     info->nz_used      = irecv[0];
1753     info->nz_allocated = irecv[1];
1754     info->nz_unneeded  = irecv[2];
1755     info->memory       = irecv[3];
1756     info->mallocs      = irecv[4];
1757   } else if (flag == MAT_GLOBAL_SUM) {
1758     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1759     info->nz_used      = irecv[0];
1760     info->nz_allocated = irecv[1];
1761     info->nz_unneeded  = irecv[2];
1762     info->memory       = irecv[3];
1763     info->mallocs      = irecv[4];
1764   }
1765   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1766   info->fill_ratio_needed = 0;
1767   info->factor_mallocs    = 0;
1768 
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 #undef __FUNCT__
1773 #define __FUNCT__ "MatSetOption_MPIAIJ"
1774 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1775 {
1776   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1777   PetscErrorCode ierr;
1778 
1779   PetscFunctionBegin;
1780   switch (op) {
1781   case MAT_NEW_NONZERO_LOCATIONS:
1782   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1783   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1784   case MAT_KEEP_NONZERO_PATTERN:
1785   case MAT_NEW_NONZERO_LOCATION_ERR:
1786   case MAT_USE_INODES:
1787   case MAT_IGNORE_ZERO_ENTRIES:
1788     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1789     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1790     break;
1791   case MAT_ROW_ORIENTED:
1792     a->roworiented = flg;
1793     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1794     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1795     break;
1796   case MAT_NEW_DIAGONALS:
1797     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1798     break;
1799   case MAT_IGNORE_OFF_PROC_ENTRIES:
1800     a->donotstash = flg;
1801     break;
1802   case MAT_SPD:
1803     A->spd_set                         = PETSC_TRUE;
1804     A->spd                             = flg;
1805     if (flg) {
1806       A->symmetric                     = PETSC_TRUE;
1807       A->structurally_symmetric        = PETSC_TRUE;
1808       A->symmetric_set                 = PETSC_TRUE;
1809       A->structurally_symmetric_set    = PETSC_TRUE;
1810     }
1811     break;
1812   case MAT_SYMMETRIC:
1813     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1814     break;
1815   case MAT_STRUCTURALLY_SYMMETRIC:
1816     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1817     break;
1818   case MAT_HERMITIAN:
1819     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1820     break;
1821   case MAT_SYMMETRY_ETERNAL:
1822     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1823     break;
1824   default:
1825     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1826   }
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 #undef __FUNCT__
1831 #define __FUNCT__ "MatGetRow_MPIAIJ"
1832 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1833 {
1834   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1835   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1836   PetscErrorCode ierr;
1837   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1838   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1839   PetscInt       *cmap,*idx_p;
1840 
1841   PetscFunctionBegin;
1842   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1843   mat->getrowactive = PETSC_TRUE;
1844 
1845   if (!mat->rowvalues && (idx || v)) {
1846     /*
1847         allocate enough space to hold information from the longest row.
1848     */
1849     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1850     PetscInt   max = 1,tmp;
1851     for (i=0; i<matin->rmap->n; i++) {
1852       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1853       if (max < tmp) { max = tmp; }
1854     }
1855     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1856   }
1857 
1858   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1859   lrow = row - rstart;
1860 
1861   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1862   if (!v)   {pvA = 0; pvB = 0;}
1863   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1864   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1865   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1866   nztot = nzA + nzB;
1867 
1868   cmap  = mat->garray;
1869   if (v  || idx) {
1870     if (nztot) {
1871       /* Sort by increasing column numbers, assuming A and B already sorted */
1872       PetscInt imark = -1;
1873       if (v) {
1874         *v = v_p = mat->rowvalues;
1875         for (i=0; i<nzB; i++) {
1876           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1877           else break;
1878         }
1879         imark = i;
1880         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1881         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1882       }
1883       if (idx) {
1884         *idx = idx_p = mat->rowindices;
1885         if (imark > -1) {
1886           for (i=0; i<imark; i++) {
1887             idx_p[i] = cmap[cworkB[i]];
1888           }
1889         } else {
1890           for (i=0; i<nzB; i++) {
1891             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1892             else break;
1893           }
1894           imark = i;
1895         }
1896         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1897         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1898       }
1899     } else {
1900       if (idx) *idx = 0;
1901       if (v)   *v   = 0;
1902     }
1903   }
1904   *nz = nztot;
1905   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1906   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1912 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1913 {
1914   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1915 
1916   PetscFunctionBegin;
1917   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1918   aij->getrowactive = PETSC_FALSE;
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatNorm_MPIAIJ"
1924 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1925 {
1926   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1927   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1928   PetscErrorCode ierr;
1929   PetscInt       i,j,cstart = mat->cmap->rstart;
1930   PetscReal      sum = 0.0;
1931   MatScalar      *v;
1932 
1933   PetscFunctionBegin;
1934   if (aij->size == 1) {
1935     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1936   } else {
1937     if (type == NORM_FROBENIUS) {
1938       v = amat->a;
1939       for (i=0; i<amat->nz; i++) {
1940 #if defined(PETSC_USE_COMPLEX)
1941         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1942 #else
1943         sum += (*v)*(*v); v++;
1944 #endif
1945       }
1946       v = bmat->a;
1947       for (i=0; i<bmat->nz; i++) {
1948 #if defined(PETSC_USE_COMPLEX)
1949         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1950 #else
1951         sum += (*v)*(*v); v++;
1952 #endif
1953       }
1954       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1955       *norm = PetscSqrtReal(*norm);
1956     } else if (type == NORM_1) { /* max column norm */
1957       PetscReal *tmp,*tmp2;
1958       PetscInt  *jj,*garray = aij->garray;
1959       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1960       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1961       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1962       *norm = 0.0;
1963       v = amat->a; jj = amat->j;
1964       for (j=0; j<amat->nz; j++) {
1965         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1966       }
1967       v = bmat->a; jj = bmat->j;
1968       for (j=0; j<bmat->nz; j++) {
1969         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1970       }
1971       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1972       for (j=0; j<mat->cmap->N; j++) {
1973         if (tmp2[j] > *norm) *norm = tmp2[j];
1974       }
1975       ierr = PetscFree(tmp);CHKERRQ(ierr);
1976       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1977     } else if (type == NORM_INFINITY) { /* max row norm */
1978       PetscReal ntemp = 0.0;
1979       for (j=0; j<aij->A->rmap->n; j++) {
1980         v = amat->a + amat->i[j];
1981         sum = 0.0;
1982         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1983           sum += PetscAbsScalar(*v); v++;
1984         }
1985         v = bmat->a + bmat->i[j];
1986         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1987           sum += PetscAbsScalar(*v); v++;
1988         }
1989         if (sum > ntemp) ntemp = sum;
1990       }
1991       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1992     } else {
1993       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1994     }
1995   }
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 #undef __FUNCT__
2000 #define __FUNCT__ "MatTranspose_MPIAIJ"
2001 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2002 {
2003   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2004   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2005   PetscErrorCode ierr;
2006   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2007   PetscInt       cstart=A->cmap->rstart,ncol;
2008   Mat            B;
2009   MatScalar      *array;
2010 
2011   PetscFunctionBegin;
2012   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2013 
2014   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2015   ai = Aloc->i; aj = Aloc->j;
2016   bi = Bloc->i; bj = Bloc->j;
2017   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2018     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2019     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2020     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2021     for (i=0; i<ai[ma]; i++){
2022       d_nnz[aj[i]] ++;
2023       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2024     }
2025 
2026     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2027     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2028     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2029     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2030     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2031   } else {
2032     B = *matout;
2033   }
2034 
2035   /* copy over the A part */
2036   array = Aloc->a;
2037   row = A->rmap->rstart;
2038   for (i=0; i<ma; i++) {
2039     ncol = ai[i+1]-ai[i];
2040     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2041     row++; array += ncol; aj += ncol;
2042   }
2043   aj = Aloc->j;
2044   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2045 
2046   /* copy over the B part */
2047   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2048   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2049   array = Bloc->a;
2050   row = A->rmap->rstart;
2051   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2052   cols_tmp = cols;
2053   for (i=0; i<mb; i++) {
2054     ncol = bi[i+1]-bi[i];
2055     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2056     row++; array += ncol; cols_tmp += ncol;
2057   }
2058   ierr = PetscFree(cols);CHKERRQ(ierr);
2059 
2060   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2061   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2062   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2063     *matout = B;
2064   } else {
2065     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2066   }
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2072 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2073 {
2074   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2075   Mat            a = aij->A,b = aij->B;
2076   PetscErrorCode ierr;
2077   PetscInt       s1,s2,s3;
2078 
2079   PetscFunctionBegin;
2080   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2081   if (rr) {
2082     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2083     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2084     /* Overlap communication with computation. */
2085     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2086   }
2087   if (ll) {
2088     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2089     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2090     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2091   }
2092   /* scale  the diagonal block */
2093   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2094 
2095   if (rr) {
2096     /* Do a scatter end and then right scale the off-diagonal block */
2097     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2098     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2099   }
2100 
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 #undef __FUNCT__
2105 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2106 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2107 {
2108   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2109   PetscErrorCode ierr;
2110 
2111   PetscFunctionBegin;
2112   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2113   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2114   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2115   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2116   PetscFunctionReturn(0);
2117 }
2118 #undef __FUNCT__
2119 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2120 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2121 {
2122   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2123   PetscErrorCode ierr;
2124 
2125   PetscFunctionBegin;
2126   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 #undef __FUNCT__
2131 #define __FUNCT__ "MatEqual_MPIAIJ"
2132 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2133 {
2134   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2135   Mat            a,b,c,d;
2136   PetscBool      flg;
2137   PetscErrorCode ierr;
2138 
2139   PetscFunctionBegin;
2140   a = matA->A; b = matA->B;
2141   c = matB->A; d = matB->B;
2142 
2143   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2144   if (flg) {
2145     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2146   }
2147   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatCopy_MPIAIJ"
2153 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2154 {
2155   PetscErrorCode ierr;
2156   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2157   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2158 
2159   PetscFunctionBegin;
2160   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2161   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2162     /* because of the column compression in the off-processor part of the matrix a->B,
2163        the number of columns in a->B and b->B may be different, hence we cannot call
2164        the MatCopy() directly on the two parts. If need be, we can provide a more
2165        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2166        then copying the submatrices */
2167     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2168   } else {
2169     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2170     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2171   }
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 #undef __FUNCT__
2176 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2177 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2178 {
2179   PetscErrorCode ierr;
2180 
2181   PetscFunctionBegin;
2182   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2183   PetscFunctionReturn(0);
2184 }
2185 
2186 #undef __FUNCT__
2187 #define __FUNCT__ "MatAXPY_MPIAIJ"
2188 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2189 {
2190   PetscErrorCode ierr;
2191   PetscInt       i;
2192   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2193   PetscBLASInt   bnz,one=1;
2194   Mat_SeqAIJ     *x,*y;
2195 
2196   PetscFunctionBegin;
2197   if (str == SAME_NONZERO_PATTERN) {
2198     PetscScalar alpha = a;
2199     x = (Mat_SeqAIJ *)xx->A->data;
2200     y = (Mat_SeqAIJ *)yy->A->data;
2201     bnz = PetscBLASIntCast(x->nz);
2202     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2203     x = (Mat_SeqAIJ *)xx->B->data;
2204     y = (Mat_SeqAIJ *)yy->B->data;
2205     bnz = PetscBLASIntCast(x->nz);
2206     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2207   } else if (str == SUBSET_NONZERO_PATTERN) {
2208     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2209 
2210     x = (Mat_SeqAIJ *)xx->B->data;
2211     y = (Mat_SeqAIJ *)yy->B->data;
2212     if (y->xtoy && y->XtoY != xx->B) {
2213       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2214       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2215     }
2216     if (!y->xtoy) { /* get xtoy */
2217       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2218       y->XtoY = xx->B;
2219       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2220     }
2221     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2222   } else {
2223     Mat B;
2224     PetscInt *nnz_d,*nnz_o;
2225     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2226     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2227     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2228     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2229     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2230     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2231     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2232     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2233     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2234     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2235     ierr = MatHeaderReplace(Y,B);
2236     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2237     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2238   }
2239   PetscFunctionReturn(0);
2240 }
2241 
2242 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2243 
2244 #undef __FUNCT__
2245 #define __FUNCT__ "MatConjugate_MPIAIJ"
2246 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2247 {
2248 #if defined(PETSC_USE_COMPLEX)
2249   PetscErrorCode ierr;
2250   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2251 
2252   PetscFunctionBegin;
2253   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2254   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2255 #else
2256   PetscFunctionBegin;
2257 #endif
2258   PetscFunctionReturn(0);
2259 }
2260 
2261 #undef __FUNCT__
2262 #define __FUNCT__ "MatRealPart_MPIAIJ"
2263 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2264 {
2265   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2266   PetscErrorCode ierr;
2267 
2268   PetscFunctionBegin;
2269   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2270   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 #undef __FUNCT__
2275 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2276 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2277 {
2278   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2279   PetscErrorCode ierr;
2280 
2281   PetscFunctionBegin;
2282   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2283   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 #ifdef PETSC_HAVE_PBGL
2288 
2289 #include <boost/parallel/mpi/bsp_process_group.hpp>
2290 #include <boost/graph/distributed/ilu_default_graph.hpp>
2291 #include <boost/graph/distributed/ilu_0_block.hpp>
2292 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2293 #include <boost/graph/distributed/petsc/interface.hpp>
2294 #include <boost/multi_array.hpp>
2295 #include <boost/parallel/distributed_property_map->hpp>
2296 
2297 #undef __FUNCT__
2298 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2299 /*
2300   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2301 */
2302 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2303 {
2304   namespace petsc = boost::distributed::petsc;
2305 
2306   namespace graph_dist = boost::graph::distributed;
2307   using boost::graph::distributed::ilu_default::process_group_type;
2308   using boost::graph::ilu_permuted;
2309 
2310   PetscBool       row_identity, col_identity;
2311   PetscContainer  c;
2312   PetscInt        m, n, M, N;
2313   PetscErrorCode  ierr;
2314 
2315   PetscFunctionBegin;
2316   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2317   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2318   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2319   if (!row_identity || !col_identity) {
2320     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2321   }
2322 
2323   process_group_type pg;
2324   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2325   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2326   lgraph_type&   level_graph = *lgraph_p;
2327   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2328 
2329   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2330   ilu_permuted(level_graph);
2331 
2332   /* put together the new matrix */
2333   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2334   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2335   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2336   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2337   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2338   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2339   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2340 
2341   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2342   ierr = PetscContainerSetPointer(c, lgraph_p);
2343   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2344   ierr = PetscContainerDestroy(&c);
2345   PetscFunctionReturn(0);
2346 }
2347 
2348 #undef __FUNCT__
2349 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2350 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2351 {
2352   PetscFunctionBegin;
2353   PetscFunctionReturn(0);
2354 }
2355 
2356 #undef __FUNCT__
2357 #define __FUNCT__ "MatSolve_MPIAIJ"
2358 /*
2359   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2360 */
2361 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2362 {
2363   namespace graph_dist = boost::graph::distributed;
2364 
2365   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2366   lgraph_type*   lgraph_p;
2367   PetscContainer c;
2368   PetscErrorCode ierr;
2369 
2370   PetscFunctionBegin;
2371   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2372   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2373   ierr = VecCopy(b, x);CHKERRQ(ierr);
2374 
2375   PetscScalar* array_x;
2376   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2377   PetscInt sx;
2378   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2379 
2380   PetscScalar* array_b;
2381   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2382   PetscInt sb;
2383   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2384 
2385   lgraph_type&   level_graph = *lgraph_p;
2386   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2387 
2388   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2389   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2390                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2391 
2392   typedef boost::iterator_property_map<array_ref_type::iterator,
2393                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2394   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2395                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2396 
2397   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2398 
2399   PetscFunctionReturn(0);
2400 }
2401 #endif
2402 
2403 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2404   PetscInt       nzlocal,nsends,nrecvs;
2405   PetscMPIInt    *send_rank,*recv_rank;
2406   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2407   PetscScalar    *sbuf_a,**rbuf_a;
2408   PetscErrorCode (*Destroy)(Mat);
2409 } Mat_Redundant;
2410 
2411 #undef __FUNCT__
2412 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2413 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2414 {
2415   PetscErrorCode       ierr;
2416   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2417   PetscInt             i;
2418 
2419   PetscFunctionBegin;
2420   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2421   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2422   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2423   for (i=0; i<redund->nrecvs; i++){
2424     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2425     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2426   }
2427   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2428   ierr = PetscFree(redund);CHKERRQ(ierr);
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 #undef __FUNCT__
2433 #define __FUNCT__ "MatDestroy_MatRedundant"
2434 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2435 {
2436   PetscErrorCode  ierr;
2437   PetscContainer  container;
2438   Mat_Redundant   *redund=PETSC_NULL;
2439 
2440   PetscFunctionBegin;
2441   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2442   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2443   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2444   A->ops->destroy = redund->Destroy;
2445   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2446   if (A->ops->destroy) {
2447     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2448   }
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 #undef __FUNCT__
2453 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2454 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2455 {
2456   PetscMPIInt    rank,size;
2457   MPI_Comm       comm=((PetscObject)mat)->comm;
2458   PetscErrorCode ierr;
2459   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2460   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2461   PetscInt       *rowrange=mat->rmap->range;
2462   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2463   Mat            A=aij->A,B=aij->B,C=*matredundant;
2464   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2465   PetscScalar    *sbuf_a;
2466   PetscInt       nzlocal=a->nz+b->nz;
2467   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2468   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2469   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2470   MatScalar      *aworkA,*aworkB;
2471   PetscScalar    *vals;
2472   PetscMPIInt    tag1,tag2,tag3,imdex;
2473   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2474                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2475   MPI_Status     recv_status,*send_status;
2476   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2477   PetscInt       **rbuf_j=PETSC_NULL;
2478   PetscScalar    **rbuf_a=PETSC_NULL;
2479   Mat_Redundant  *redund=PETSC_NULL;
2480   PetscContainer container;
2481 
2482   PetscFunctionBegin;
2483   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2484   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2485 
2486   if (reuse == MAT_REUSE_MATRIX) {
2487     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2488     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2489     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2490     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2491     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2492     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2493     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2494     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2495 
2496     nsends    = redund->nsends;
2497     nrecvs    = redund->nrecvs;
2498     send_rank = redund->send_rank;
2499     recv_rank = redund->recv_rank;
2500     sbuf_nz   = redund->sbuf_nz;
2501     rbuf_nz   = redund->rbuf_nz;
2502     sbuf_j    = redund->sbuf_j;
2503     sbuf_a    = redund->sbuf_a;
2504     rbuf_j    = redund->rbuf_j;
2505     rbuf_a    = redund->rbuf_a;
2506   }
2507 
2508   if (reuse == MAT_INITIAL_MATRIX){
2509     PetscMPIInt  subrank,subsize;
2510     PetscInt     nleftover,np_subcomm;
2511     /* get the destination processors' id send_rank, nsends and nrecvs */
2512     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2513     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2514     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2515     np_subcomm = size/nsubcomm;
2516     nleftover  = size - nsubcomm*np_subcomm;
2517     nsends = 0; nrecvs = 0;
2518     for (i=0; i<size; i++){ /* i=rank*/
2519       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2520         send_rank[nsends] = i; nsends++;
2521         recv_rank[nrecvs++] = i;
2522       }
2523     }
2524     if (rank >= size - nleftover){/* this proc is a leftover processor */
2525       i = size-nleftover-1;
2526       j = 0;
2527       while (j < nsubcomm - nleftover){
2528         send_rank[nsends++] = i;
2529         i--; j++;
2530       }
2531     }
2532 
2533     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2534       for (i=0; i<nleftover; i++){
2535         recv_rank[nrecvs++] = size-nleftover+i;
2536       }
2537     }
2538 
2539     /* allocate sbuf_j, sbuf_a */
2540     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2541     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2542     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2543   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2544 
2545   /* copy mat's local entries into the buffers */
2546   if (reuse == MAT_INITIAL_MATRIX){
2547     rownz_max = 0;
2548     rptr = sbuf_j;
2549     cols = sbuf_j + rend-rstart + 1;
2550     vals = sbuf_a;
2551     rptr[0] = 0;
2552     for (i=0; i<rend-rstart; i++){
2553       row = i + rstart;
2554       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2555       ncols  = nzA + nzB;
2556       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2557       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2558       /* load the column indices for this row into cols */
2559       lwrite = 0;
2560       for (l=0; l<nzB; l++) {
2561         if ((ctmp = bmap[cworkB[l]]) < cstart){
2562           vals[lwrite]   = aworkB[l];
2563           cols[lwrite++] = ctmp;
2564         }
2565       }
2566       for (l=0; l<nzA; l++){
2567         vals[lwrite]   = aworkA[l];
2568         cols[lwrite++] = cstart + cworkA[l];
2569       }
2570       for (l=0; l<nzB; l++) {
2571         if ((ctmp = bmap[cworkB[l]]) >= cend){
2572           vals[lwrite]   = aworkB[l];
2573           cols[lwrite++] = ctmp;
2574         }
2575       }
2576       vals += ncols;
2577       cols += ncols;
2578       rptr[i+1] = rptr[i] + ncols;
2579       if (rownz_max < ncols) rownz_max = ncols;
2580     }
2581     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2582   } else { /* only copy matrix values into sbuf_a */
2583     rptr = sbuf_j;
2584     vals = sbuf_a;
2585     rptr[0] = 0;
2586     for (i=0; i<rend-rstart; i++){
2587       row = i + rstart;
2588       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2589       ncols  = nzA + nzB;
2590       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2591       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2592       lwrite = 0;
2593       for (l=0; l<nzB; l++) {
2594         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2595       }
2596       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2597       for (l=0; l<nzB; l++) {
2598         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2599       }
2600       vals += ncols;
2601       rptr[i+1] = rptr[i] + ncols;
2602     }
2603   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2604 
2605   /* send nzlocal to others, and recv other's nzlocal */
2606   /*--------------------------------------------------*/
2607   if (reuse == MAT_INITIAL_MATRIX){
2608     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2609     s_waits2 = s_waits3 + nsends;
2610     s_waits1 = s_waits2 + nsends;
2611     r_waits1 = s_waits1 + nsends;
2612     r_waits2 = r_waits1 + nrecvs;
2613     r_waits3 = r_waits2 + nrecvs;
2614   } else {
2615     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2616     r_waits3 = s_waits3 + nsends;
2617   }
2618 
2619   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2620   if (reuse == MAT_INITIAL_MATRIX){
2621     /* get new tags to keep the communication clean */
2622     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2623     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2624     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2625 
2626     /* post receives of other's nzlocal */
2627     for (i=0; i<nrecvs; i++){
2628       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2629     }
2630     /* send nzlocal to others */
2631     for (i=0; i<nsends; i++){
2632       sbuf_nz[i] = nzlocal;
2633       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2634     }
2635     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2636     count = nrecvs;
2637     while (count) {
2638       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2639       recv_rank[imdex] = recv_status.MPI_SOURCE;
2640       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2641       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2642 
2643       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2644       rbuf_nz[imdex] += i + 2;
2645       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2646       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2647       count--;
2648     }
2649     /* wait on sends of nzlocal */
2650     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2651     /* send mat->i,j to others, and recv from other's */
2652     /*------------------------------------------------*/
2653     for (i=0; i<nsends; i++){
2654       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2655       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2656     }
2657     /* wait on receives of mat->i,j */
2658     /*------------------------------*/
2659     count = nrecvs;
2660     while (count) {
2661       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2662       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2663       count--;
2664     }
2665     /* wait on sends of mat->i,j */
2666     /*---------------------------*/
2667     if (nsends) {
2668       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2669     }
2670   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2671 
2672   /* post receives, send and receive mat->a */
2673   /*----------------------------------------*/
2674   for (imdex=0; imdex<nrecvs; imdex++) {
2675     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2676   }
2677   for (i=0; i<nsends; i++){
2678     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2679   }
2680   count = nrecvs;
2681   while (count) {
2682     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2683     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2684     count--;
2685   }
2686   if (nsends) {
2687     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2688   }
2689 
2690   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2691 
2692   /* create redundant matrix */
2693   /*-------------------------*/
2694   if (reuse == MAT_INITIAL_MATRIX){
2695     /* compute rownz_max for preallocation */
2696     for (imdex=0; imdex<nrecvs; imdex++){
2697       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2698       rptr = rbuf_j[imdex];
2699       for (i=0; i<j; i++){
2700         ncols = rptr[i+1] - rptr[i];
2701         if (rownz_max < ncols) rownz_max = ncols;
2702       }
2703     }
2704 
2705     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2706     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2707     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2708     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2709     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2710   } else {
2711     C = *matredundant;
2712   }
2713 
2714   /* insert local matrix entries */
2715   rptr = sbuf_j;
2716   cols = sbuf_j + rend-rstart + 1;
2717   vals = sbuf_a;
2718   for (i=0; i<rend-rstart; i++){
2719     row   = i + rstart;
2720     ncols = rptr[i+1] - rptr[i];
2721     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2722     vals += ncols;
2723     cols += ncols;
2724   }
2725   /* insert received matrix entries */
2726   for (imdex=0; imdex<nrecvs; imdex++){
2727     rstart = rowrange[recv_rank[imdex]];
2728     rend   = rowrange[recv_rank[imdex]+1];
2729     rptr = rbuf_j[imdex];
2730     cols = rbuf_j[imdex] + rend-rstart + 1;
2731     vals = rbuf_a[imdex];
2732     for (i=0; i<rend-rstart; i++){
2733       row   = i + rstart;
2734       ncols = rptr[i+1] - rptr[i];
2735       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2736       vals += ncols;
2737       cols += ncols;
2738     }
2739   }
2740   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2741   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2742   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2743   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2744   if (reuse == MAT_INITIAL_MATRIX) {
2745     PetscContainer container;
2746     *matredundant = C;
2747     /* create a supporting struct and attach it to C for reuse */
2748     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2749     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2750     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2751     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2752     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2753     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2754 
2755     redund->nzlocal = nzlocal;
2756     redund->nsends  = nsends;
2757     redund->nrecvs  = nrecvs;
2758     redund->send_rank = send_rank;
2759     redund->recv_rank = recv_rank;
2760     redund->sbuf_nz = sbuf_nz;
2761     redund->rbuf_nz = rbuf_nz;
2762     redund->sbuf_j  = sbuf_j;
2763     redund->sbuf_a  = sbuf_a;
2764     redund->rbuf_j  = rbuf_j;
2765     redund->rbuf_a  = rbuf_a;
2766 
2767     redund->Destroy = C->ops->destroy;
2768     C->ops->destroy = MatDestroy_MatRedundant;
2769   }
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 #undef __FUNCT__
2774 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2775 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2776 {
2777   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2778   PetscErrorCode ierr;
2779   PetscInt       i,*idxb = 0;
2780   PetscScalar    *va,*vb;
2781   Vec            vtmp;
2782 
2783   PetscFunctionBegin;
2784   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2785   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2786   if (idx) {
2787     for (i=0; i<A->rmap->n; i++) {
2788       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2789     }
2790   }
2791 
2792   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2793   if (idx) {
2794     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2795   }
2796   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2797   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2798 
2799   for (i=0; i<A->rmap->n; i++){
2800     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2801       va[i] = vb[i];
2802       if (idx) idx[i] = a->garray[idxb[i]];
2803     }
2804   }
2805 
2806   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2807   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2808   ierr = PetscFree(idxb);CHKERRQ(ierr);
2809   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2810   PetscFunctionReturn(0);
2811 }
2812 
2813 #undef __FUNCT__
2814 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2815 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2816 {
2817   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2818   PetscErrorCode ierr;
2819   PetscInt       i,*idxb = 0;
2820   PetscScalar    *va,*vb;
2821   Vec            vtmp;
2822 
2823   PetscFunctionBegin;
2824   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2825   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2826   if (idx) {
2827     for (i=0; i<A->cmap->n; i++) {
2828       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2829     }
2830   }
2831 
2832   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2833   if (idx) {
2834     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2835   }
2836   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2837   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2838 
2839   for (i=0; i<A->rmap->n; i++){
2840     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2841       va[i] = vb[i];
2842       if (idx) idx[i] = a->garray[idxb[i]];
2843     }
2844   }
2845 
2846   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2847   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2848   ierr = PetscFree(idxb);CHKERRQ(ierr);
2849   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2850   PetscFunctionReturn(0);
2851 }
2852 
2853 #undef __FUNCT__
2854 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2855 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2856 {
2857   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2858   PetscInt       n      = A->rmap->n;
2859   PetscInt       cstart = A->cmap->rstart;
2860   PetscInt      *cmap   = mat->garray;
2861   PetscInt      *diagIdx, *offdiagIdx;
2862   Vec            diagV, offdiagV;
2863   PetscScalar   *a, *diagA, *offdiagA;
2864   PetscInt       r;
2865   PetscErrorCode ierr;
2866 
2867   PetscFunctionBegin;
2868   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2869   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2870   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2871   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2872   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2873   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2874   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2875   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2876   for(r = 0; r < n; ++r) {
2877     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2878       a[r]   = diagA[r];
2879       idx[r] = cstart + diagIdx[r];
2880     } else {
2881       a[r]   = offdiagA[r];
2882       idx[r] = cmap[offdiagIdx[r]];
2883     }
2884   }
2885   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2886   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2887   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2888   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2889   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2890   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2891   PetscFunctionReturn(0);
2892 }
2893 
2894 #undef __FUNCT__
2895 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2896 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2897 {
2898   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2899   PetscInt       n      = A->rmap->n;
2900   PetscInt       cstart = A->cmap->rstart;
2901   PetscInt      *cmap   = mat->garray;
2902   PetscInt      *diagIdx, *offdiagIdx;
2903   Vec            diagV, offdiagV;
2904   PetscScalar   *a, *diagA, *offdiagA;
2905   PetscInt       r;
2906   PetscErrorCode ierr;
2907 
2908   PetscFunctionBegin;
2909   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2910   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2911   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2912   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2913   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2914   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2915   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2916   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2917   for(r = 0; r < n; ++r) {
2918     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2919       a[r]   = diagA[r];
2920       idx[r] = cstart + diagIdx[r];
2921     } else {
2922       a[r]   = offdiagA[r];
2923       idx[r] = cmap[offdiagIdx[r]];
2924     }
2925   }
2926   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2927   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2928   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2929   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2930   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2931   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2932   PetscFunctionReturn(0);
2933 }
2934 
2935 #undef __FUNCT__
2936 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2937 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2938 {
2939   PetscErrorCode ierr;
2940   Mat            *dummy;
2941 
2942   PetscFunctionBegin;
2943   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2944   *newmat = *dummy;
2945   ierr = PetscFree(dummy);CHKERRQ(ierr);
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2950 
2951 #undef __FUNCT__
2952 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2953 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2954 {
2955   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2956   PetscErrorCode ierr;
2957 
2958   PetscFunctionBegin;
2959   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2960   PetscFunctionReturn(0);
2961 }
2962 
2963 
2964 /* -------------------------------------------------------------------*/
2965 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2966        MatGetRow_MPIAIJ,
2967        MatRestoreRow_MPIAIJ,
2968        MatMult_MPIAIJ,
2969 /* 4*/ MatMultAdd_MPIAIJ,
2970        MatMultTranspose_MPIAIJ,
2971        MatMultTransposeAdd_MPIAIJ,
2972 #ifdef PETSC_HAVE_PBGL
2973        MatSolve_MPIAIJ,
2974 #else
2975        0,
2976 #endif
2977        0,
2978        0,
2979 /*10*/ 0,
2980        0,
2981        0,
2982        MatSOR_MPIAIJ,
2983        MatTranspose_MPIAIJ,
2984 /*15*/ MatGetInfo_MPIAIJ,
2985        MatEqual_MPIAIJ,
2986        MatGetDiagonal_MPIAIJ,
2987        MatDiagonalScale_MPIAIJ,
2988        MatNorm_MPIAIJ,
2989 /*20*/ MatAssemblyBegin_MPIAIJ,
2990        MatAssemblyEnd_MPIAIJ,
2991        MatSetOption_MPIAIJ,
2992        MatZeroEntries_MPIAIJ,
2993 /*24*/ MatZeroRows_MPIAIJ,
2994        0,
2995 #ifdef PETSC_HAVE_PBGL
2996        0,
2997 #else
2998        0,
2999 #endif
3000        0,
3001        0,
3002 /*29*/ MatSetUpPreallocation_MPIAIJ,
3003 #ifdef PETSC_HAVE_PBGL
3004        0,
3005 #else
3006        0,
3007 #endif
3008        0,
3009        0,
3010        0,
3011 /*34*/ MatDuplicate_MPIAIJ,
3012        0,
3013        0,
3014        0,
3015        0,
3016 /*39*/ MatAXPY_MPIAIJ,
3017        MatGetSubMatrices_MPIAIJ,
3018        MatIncreaseOverlap_MPIAIJ,
3019        MatGetValues_MPIAIJ,
3020        MatCopy_MPIAIJ,
3021 /*44*/ MatGetRowMax_MPIAIJ,
3022        MatScale_MPIAIJ,
3023        0,
3024        0,
3025        MatZeroRowsColumns_MPIAIJ,
3026 /*49*/ MatSetBlockSize_MPIAIJ,
3027        0,
3028        0,
3029        0,
3030        0,
3031 /*54*/ MatFDColoringCreate_MPIAIJ,
3032        0,
3033        MatSetUnfactored_MPIAIJ,
3034        MatPermute_MPIAIJ,
3035        0,
3036 /*59*/ MatGetSubMatrix_MPIAIJ,
3037        MatDestroy_MPIAIJ,
3038        MatView_MPIAIJ,
3039        0,
3040        0,
3041 /*64*/ 0,
3042        0,
3043        0,
3044        0,
3045        0,
3046 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3047        MatGetRowMinAbs_MPIAIJ,
3048        0,
3049        MatSetColoring_MPIAIJ,
3050 #if defined(PETSC_HAVE_ADIC)
3051        MatSetValuesAdic_MPIAIJ,
3052 #else
3053        0,
3054 #endif
3055        MatSetValuesAdifor_MPIAIJ,
3056 /*75*/ MatFDColoringApply_AIJ,
3057        0,
3058        0,
3059        0,
3060        0,
3061 /*80*/ 0,
3062        0,
3063        0,
3064 /*83*/ MatLoad_MPIAIJ,
3065        0,
3066        0,
3067        0,
3068        0,
3069        0,
3070 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3071        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3072        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3073        MatPtAP_Basic,
3074        MatPtAPSymbolic_MPIAIJ,
3075 /*94*/ MatPtAPNumeric_MPIAIJ,
3076        0,
3077        0,
3078        0,
3079        0,
3080 /*99*/ 0,
3081        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3082        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3083        MatConjugate_MPIAIJ,
3084        0,
3085 /*104*/MatSetValuesRow_MPIAIJ,
3086        MatRealPart_MPIAIJ,
3087        MatImaginaryPart_MPIAIJ,
3088        0,
3089        0,
3090 /*109*/0,
3091        MatGetRedundantMatrix_MPIAIJ,
3092        MatGetRowMin_MPIAIJ,
3093        0,
3094        0,
3095 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3096        0,
3097        0,
3098        0,
3099        0,
3100 /*119*/0,
3101        0,
3102        0,
3103        0,
3104        MatGetMultiProcBlock_MPIAIJ,
3105 /*124*/MatFindNonZeroRows_MPIAIJ,
3106        MatGetColumnNorms_MPIAIJ,
3107        MatInvertBlockDiagonal_MPIAIJ,
3108        0,
3109        MatGetSubMatricesParallel_MPIAIJ,
3110 /*129*/0,
3111        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3112        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3113        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3114        0,
3115 /*134*/0,
3116        0,
3117        0,
3118        0,
3119        0
3120 };
3121 
3122 /* ----------------------------------------------------------------------------------------*/
3123 
3124 EXTERN_C_BEGIN
3125 #undef __FUNCT__
3126 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3127 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3128 {
3129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3130   PetscErrorCode ierr;
3131 
3132   PetscFunctionBegin;
3133   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3134   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3135   PetscFunctionReturn(0);
3136 }
3137 EXTERN_C_END
3138 
3139 EXTERN_C_BEGIN
3140 #undef __FUNCT__
3141 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3142 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3143 {
3144   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3145   PetscErrorCode ierr;
3146 
3147   PetscFunctionBegin;
3148   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3149   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3150   PetscFunctionReturn(0);
3151 }
3152 EXTERN_C_END
3153 
3154 EXTERN_C_BEGIN
3155 #undef __FUNCT__
3156 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3157 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3158 {
3159   Mat_MPIAIJ     *b;
3160   PetscErrorCode ierr;
3161   PetscInt       i;
3162 
3163   PetscFunctionBegin;
3164   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3165   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3166   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3167   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3168 
3169   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3170   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3171   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3172   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3173   if (d_nnz) {
3174     for (i=0; i<B->rmap->n; i++) {
3175       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3176     }
3177   }
3178   if (o_nnz) {
3179     for (i=0; i<B->rmap->n; i++) {
3180       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3181     }
3182   }
3183   b = (Mat_MPIAIJ*)B->data;
3184 
3185   if (!B->preallocated) {
3186     /* Explicitly create 2 MATSEQAIJ matrices. */
3187     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3188     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3189     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3190     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3191     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3192     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3193     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3194     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3195   }
3196 
3197   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3198   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3199   B->preallocated = PETSC_TRUE;
3200   ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3201   PetscFunctionReturn(0);
3202 }
3203 EXTERN_C_END
3204 
3205 #undef __FUNCT__
3206 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3207 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3208 {
3209   Mat            mat;
3210   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3211   PetscErrorCode ierr;
3212 
3213   PetscFunctionBegin;
3214   *newmat       = 0;
3215   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3216   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3217   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3218   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3219   a    = (Mat_MPIAIJ*)mat->data;
3220 
3221   mat->factortype    = matin->factortype;
3222   mat->rmap->bs      = matin->rmap->bs;
3223   mat->assembled    = PETSC_TRUE;
3224   mat->insertmode   = NOT_SET_VALUES;
3225   mat->preallocated = PETSC_TRUE;
3226 
3227   a->size           = oldmat->size;
3228   a->rank           = oldmat->rank;
3229   a->donotstash     = oldmat->donotstash;
3230   a->roworiented    = oldmat->roworiented;
3231   a->rowindices     = 0;
3232   a->rowvalues      = 0;
3233   a->getrowactive   = PETSC_FALSE;
3234 
3235   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3236   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3237 
3238   if (oldmat->colmap) {
3239 #if defined (PETSC_USE_CTABLE)
3240     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3241 #else
3242     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3243     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3244     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3245 #endif
3246   } else a->colmap = 0;
3247   if (oldmat->garray) {
3248     PetscInt len;
3249     len  = oldmat->B->cmap->n;
3250     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3251     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3252     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3253   } else a->garray = 0;
3254 
3255   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3256   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3257   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3258   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3259   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3260   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3261   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3262   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3263   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3264   *newmat = mat;
3265   PetscFunctionReturn(0);
3266 }
3267 
3268 
3269 
3270 #undef __FUNCT__
3271 #define __FUNCT__ "MatLoad_MPIAIJ"
3272 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3273 {
3274   PetscScalar    *vals,*svals;
3275   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3276   PetscErrorCode ierr;
3277   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3278   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3279   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3280   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3281   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3282   int            fd;
3283 
3284   PetscFunctionBegin;
3285   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3286   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3287   if (!rank) {
3288     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3289     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3290     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3291   }
3292 
3293   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3294 
3295   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3296   M = header[1]; N = header[2];
3297   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3298   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3299   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3300 
3301   /* If global sizes are set, check if they are consistent with that given in the file */
3302   if (sizesset) {
3303     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3304   }
3305   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3306   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3307 
3308   /* determine ownership of all rows */
3309   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3310   else m = newMat->rmap->n; /* Set by user */
3311 
3312   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3313   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3314 
3315   /* First process needs enough room for process with most rows */
3316   if (!rank) {
3317     mmax       = rowners[1];
3318     for (i=2; i<size; i++) {
3319       mmax = PetscMax(mmax,rowners[i]);
3320     }
3321   } else mmax = m;
3322 
3323   rowners[0] = 0;
3324   for (i=2; i<=size; i++) {
3325     rowners[i] += rowners[i-1];
3326   }
3327   rstart = rowners[rank];
3328   rend   = rowners[rank+1];
3329 
3330   /* distribute row lengths to all processors */
3331   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3332   if (!rank) {
3333     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3334     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3335     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3336     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3337     for (j=0; j<m; j++) {
3338       procsnz[0] += ourlens[j];
3339     }
3340     for (i=1; i<size; i++) {
3341       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3342       /* calculate the number of nonzeros on each processor */
3343       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3344         procsnz[i] += rowlengths[j];
3345       }
3346       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3347     }
3348     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3349   } else {
3350     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3351   }
3352 
3353   if (!rank) {
3354     /* determine max buffer needed and allocate it */
3355     maxnz = 0;
3356     for (i=0; i<size; i++) {
3357       maxnz = PetscMax(maxnz,procsnz[i]);
3358     }
3359     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3360 
3361     /* read in my part of the matrix column indices  */
3362     nz   = procsnz[0];
3363     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3364     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3365 
3366     /* read in every one elses and ship off */
3367     for (i=1; i<size; i++) {
3368       nz     = procsnz[i];
3369       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3370       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3371     }
3372     ierr = PetscFree(cols);CHKERRQ(ierr);
3373   } else {
3374     /* determine buffer space needed for message */
3375     nz = 0;
3376     for (i=0; i<m; i++) {
3377       nz += ourlens[i];
3378     }
3379     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3380 
3381     /* receive message of column indices*/
3382     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3383   }
3384 
3385   /* determine column ownership if matrix is not square */
3386   if (N != M) {
3387     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3388     else n = newMat->cmap->n;
3389     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3390     cstart = cend - n;
3391   } else {
3392     cstart = rstart;
3393     cend   = rend;
3394     n      = cend - cstart;
3395   }
3396 
3397   /* loop over local rows, determining number of off diagonal entries */
3398   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3399   jj = 0;
3400   for (i=0; i<m; i++) {
3401     for (j=0; j<ourlens[i]; j++) {
3402       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3403       jj++;
3404     }
3405   }
3406 
3407   for (i=0; i<m; i++) {
3408     ourlens[i] -= offlens[i];
3409   }
3410   if (!sizesset) {
3411     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3412   }
3413   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3414 
3415   for (i=0; i<m; i++) {
3416     ourlens[i] += offlens[i];
3417   }
3418 
3419   if (!rank) {
3420     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3421 
3422     /* read in my part of the matrix numerical values  */
3423     nz   = procsnz[0];
3424     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3425 
3426     /* insert into matrix */
3427     jj      = rstart;
3428     smycols = mycols;
3429     svals   = vals;
3430     for (i=0; i<m; i++) {
3431       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3432       smycols += ourlens[i];
3433       svals   += ourlens[i];
3434       jj++;
3435     }
3436 
3437     /* read in other processors and ship out */
3438     for (i=1; i<size; i++) {
3439       nz     = procsnz[i];
3440       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3441       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3442     }
3443     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3444   } else {
3445     /* receive numeric values */
3446     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3447 
3448     /* receive message of values*/
3449     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3450 
3451     /* insert into matrix */
3452     jj      = rstart;
3453     smycols = mycols;
3454     svals   = vals;
3455     for (i=0; i<m; i++) {
3456       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3457       smycols += ourlens[i];
3458       svals   += ourlens[i];
3459       jj++;
3460     }
3461   }
3462   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3463   ierr = PetscFree(vals);CHKERRQ(ierr);
3464   ierr = PetscFree(mycols);CHKERRQ(ierr);
3465   ierr = PetscFree(rowners);CHKERRQ(ierr);
3466 
3467   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3468   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3469   PetscFunctionReturn(0);
3470 }
3471 
3472 #undef __FUNCT__
3473 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3474 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3475 {
3476   PetscErrorCode ierr;
3477   IS             iscol_local;
3478   PetscInt       csize;
3479 
3480   PetscFunctionBegin;
3481   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3482   if (call == MAT_REUSE_MATRIX) {
3483     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3484     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3485   } else {
3486     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3487   }
3488   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3489   if (call == MAT_INITIAL_MATRIX) {
3490     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3491     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3492   }
3493   PetscFunctionReturn(0);
3494 }
3495 
3496 #undef __FUNCT__
3497 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3498 /*
3499     Not great since it makes two copies of the submatrix, first an SeqAIJ
3500   in local and then by concatenating the local matrices the end result.
3501   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3502 
3503   Note: This requires a sequential iscol with all indices.
3504 */
3505 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3506 {
3507   PetscErrorCode ierr;
3508   PetscMPIInt    rank,size;
3509   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3510   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3511   Mat            *local,M,Mreuse;
3512   MatScalar      *vwork,*aa;
3513   MPI_Comm       comm = ((PetscObject)mat)->comm;
3514   Mat_SeqAIJ     *aij;
3515 
3516 
3517   PetscFunctionBegin;
3518   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3519   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3520 
3521   if (call ==  MAT_REUSE_MATRIX) {
3522     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3523     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3524     local = &Mreuse;
3525     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3526   } else {
3527     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3528     Mreuse = *local;
3529     ierr   = PetscFree(local);CHKERRQ(ierr);
3530   }
3531 
3532   /*
3533       m - number of local rows
3534       n - number of columns (same on all processors)
3535       rstart - first row in new global matrix generated
3536   */
3537   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3538   if (call == MAT_INITIAL_MATRIX) {
3539     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3540     ii  = aij->i;
3541     jj  = aij->j;
3542 
3543     /*
3544         Determine the number of non-zeros in the diagonal and off-diagonal
3545         portions of the matrix in order to do correct preallocation
3546     */
3547 
3548     /* first get start and end of "diagonal" columns */
3549     if (csize == PETSC_DECIDE) {
3550       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3551       if (mglobal == n) { /* square matrix */
3552 	nlocal = m;
3553       } else {
3554         nlocal = n/size + ((n % size) > rank);
3555       }
3556     } else {
3557       nlocal = csize;
3558     }
3559     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3560     rstart = rend - nlocal;
3561     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3562 
3563     /* next, compute all the lengths */
3564     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3565     olens = dlens + m;
3566     for (i=0; i<m; i++) {
3567       jend = ii[i+1] - ii[i];
3568       olen = 0;
3569       dlen = 0;
3570       for (j=0; j<jend; j++) {
3571         if (*jj < rstart || *jj >= rend) olen++;
3572         else dlen++;
3573         jj++;
3574       }
3575       olens[i] = olen;
3576       dlens[i] = dlen;
3577     }
3578     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3579     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3580     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3581     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3582     ierr = PetscFree(dlens);CHKERRQ(ierr);
3583   } else {
3584     PetscInt ml,nl;
3585 
3586     M = *newmat;
3587     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3588     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3589     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3590     /*
3591          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3592        rather than the slower MatSetValues().
3593     */
3594     M->was_assembled = PETSC_TRUE;
3595     M->assembled     = PETSC_FALSE;
3596   }
3597   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3598   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3599   ii  = aij->i;
3600   jj  = aij->j;
3601   aa  = aij->a;
3602   for (i=0; i<m; i++) {
3603     row   = rstart + i;
3604     nz    = ii[i+1] - ii[i];
3605     cwork = jj;     jj += nz;
3606     vwork = aa;     aa += nz;
3607     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3608   }
3609 
3610   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3611   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3612   *newmat = M;
3613 
3614   /* save submatrix used in processor for next request */
3615   if (call ==  MAT_INITIAL_MATRIX) {
3616     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3617     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3618   }
3619 
3620   PetscFunctionReturn(0);
3621 }
3622 
3623 EXTERN_C_BEGIN
3624 #undef __FUNCT__
3625 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3626 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3627 {
3628   PetscInt       m,cstart, cend,j,nnz,i,d;
3629   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3630   const PetscInt *JJ;
3631   PetscScalar    *values;
3632   PetscErrorCode ierr;
3633 
3634   PetscFunctionBegin;
3635   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3636 
3637   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3638   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3639   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3640   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3641   m      = B->rmap->n;
3642   cstart = B->cmap->rstart;
3643   cend   = B->cmap->rend;
3644   rstart = B->rmap->rstart;
3645 
3646   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3647 
3648 #if defined(PETSC_USE_DEBUGGING)
3649   for (i=0; i<m; i++) {
3650     nnz     = Ii[i+1]- Ii[i];
3651     JJ      = J + Ii[i];
3652     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3653     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3654     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3655   }
3656 #endif
3657 
3658   for (i=0; i<m; i++) {
3659     nnz     = Ii[i+1]- Ii[i];
3660     JJ      = J + Ii[i];
3661     nnz_max = PetscMax(nnz_max,nnz);
3662     d       = 0;
3663     for (j=0; j<nnz; j++) {
3664       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3665     }
3666     d_nnz[i] = d;
3667     o_nnz[i] = nnz - d;
3668   }
3669   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3670   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3671 
3672   if (v) values = (PetscScalar*)v;
3673   else {
3674     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3675     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3676   }
3677 
3678   for (i=0; i<m; i++) {
3679     ii   = i + rstart;
3680     nnz  = Ii[i+1]- Ii[i];
3681     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3682   }
3683   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3684   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3685 
3686   if (!v) {
3687     ierr = PetscFree(values);CHKERRQ(ierr);
3688   }
3689   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3690   PetscFunctionReturn(0);
3691 }
3692 EXTERN_C_END
3693 
3694 #undef __FUNCT__
3695 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3696 /*@
3697    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3698    (the default parallel PETSc format).
3699 
3700    Collective on MPI_Comm
3701 
3702    Input Parameters:
3703 +  B - the matrix
3704 .  i - the indices into j for the start of each local row (starts with zero)
3705 .  j - the column indices for each local row (starts with zero)
3706 -  v - optional values in the matrix
3707 
3708    Level: developer
3709 
3710    Notes:
3711        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3712      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3713      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3714 
3715        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3716 
3717        The format which is used for the sparse matrix input, is equivalent to a
3718     row-major ordering.. i.e for the following matrix, the input data expected is
3719     as shown:
3720 
3721         1 0 0
3722         2 0 3     P0
3723        -------
3724         4 5 6     P1
3725 
3726      Process0 [P0]: rows_owned=[0,1]
3727         i =  {0,1,3}  [size = nrow+1  = 2+1]
3728         j =  {0,0,2}  [size = nz = 6]
3729         v =  {1,2,3}  [size = nz = 6]
3730 
3731      Process1 [P1]: rows_owned=[2]
3732         i =  {0,3}    [size = nrow+1  = 1+1]
3733         j =  {0,1,2}  [size = nz = 6]
3734         v =  {4,5,6}  [size = nz = 6]
3735 
3736 .keywords: matrix, aij, compressed row, sparse, parallel
3737 
3738 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3739           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3740 @*/
3741 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3742 {
3743   PetscErrorCode ierr;
3744 
3745   PetscFunctionBegin;
3746   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3747   PetscFunctionReturn(0);
3748 }
3749 
3750 #undef __FUNCT__
3751 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3752 /*@C
3753    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3754    (the default parallel PETSc format).  For good matrix assembly performance
3755    the user should preallocate the matrix storage by setting the parameters
3756    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3757    performance can be increased by more than a factor of 50.
3758 
3759    Collective on MPI_Comm
3760 
3761    Input Parameters:
3762 +  A - the matrix
3763 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3764            (same value is used for all local rows)
3765 .  d_nnz - array containing the number of nonzeros in the various rows of the
3766            DIAGONAL portion of the local submatrix (possibly different for each row)
3767            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3768            The size of this array is equal to the number of local rows, i.e 'm'.
3769            You must leave room for the diagonal entry even if it is zero.
3770 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3771            submatrix (same value is used for all local rows).
3772 -  o_nnz - array containing the number of nonzeros in the various rows of the
3773            OFF-DIAGONAL portion of the local submatrix (possibly different for
3774            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3775            structure. The size of this array is equal to the number
3776            of local rows, i.e 'm'.
3777 
3778    If the *_nnz parameter is given then the *_nz parameter is ignored
3779 
3780    The AIJ format (also called the Yale sparse matrix format or
3781    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3782    storage.  The stored row and column indices begin with zero.
3783    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3784 
3785    The parallel matrix is partitioned such that the first m0 rows belong to
3786    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3787    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3788 
3789    The DIAGONAL portion of the local submatrix of a processor can be defined
3790    as the submatrix which is obtained by extraction the part corresponding to
3791    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3792    first row that belongs to the processor, r2 is the last row belonging to
3793    the this processor, and c1-c2 is range of indices of the local part of a
3794    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3795    common case of a square matrix, the row and column ranges are the same and
3796    the DIAGONAL part is also square. The remaining portion of the local
3797    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3798 
3799    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3800 
3801    You can call MatGetInfo() to get information on how effective the preallocation was;
3802    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3803    You can also run with the option -info and look for messages with the string
3804    malloc in them to see if additional memory allocation was needed.
3805 
3806    Example usage:
3807 
3808    Consider the following 8x8 matrix with 34 non-zero values, that is
3809    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3810    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3811    as follows:
3812 
3813 .vb
3814             1  2  0  |  0  3  0  |  0  4
3815     Proc0   0  5  6  |  7  0  0  |  8  0
3816             9  0 10  | 11  0  0  | 12  0
3817     -------------------------------------
3818            13  0 14  | 15 16 17  |  0  0
3819     Proc1   0 18  0  | 19 20 21  |  0  0
3820             0  0  0  | 22 23  0  | 24  0
3821     -------------------------------------
3822     Proc2  25 26 27  |  0  0 28  | 29  0
3823            30  0  0  | 31 32 33  |  0 34
3824 .ve
3825 
3826    This can be represented as a collection of submatrices as:
3827 
3828 .vb
3829       A B C
3830       D E F
3831       G H I
3832 .ve
3833 
3834    Where the submatrices A,B,C are owned by proc0, D,E,F are
3835    owned by proc1, G,H,I are owned by proc2.
3836 
3837    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3838    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3839    The 'M','N' parameters are 8,8, and have the same values on all procs.
3840 
3841    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3842    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3843    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3844    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3845    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3846    matrix, ans [DF] as another SeqAIJ matrix.
3847 
3848    When d_nz, o_nz parameters are specified, d_nz storage elements are
3849    allocated for every row of the local diagonal submatrix, and o_nz
3850    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3851    One way to choose d_nz and o_nz is to use the max nonzerors per local
3852    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3853    In this case, the values of d_nz,o_nz are:
3854 .vb
3855      proc0 : dnz = 2, o_nz = 2
3856      proc1 : dnz = 3, o_nz = 2
3857      proc2 : dnz = 1, o_nz = 4
3858 .ve
3859    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3860    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3861    for proc3. i.e we are using 12+15+10=37 storage locations to store
3862    34 values.
3863 
3864    When d_nnz, o_nnz parameters are specified, the storage is specified
3865    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3866    In the above case the values for d_nnz,o_nnz are:
3867 .vb
3868      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3869      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3870      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3871 .ve
3872    Here the space allocated is sum of all the above values i.e 34, and
3873    hence pre-allocation is perfect.
3874 
3875    Level: intermediate
3876 
3877 .keywords: matrix, aij, compressed row, sparse, parallel
3878 
3879 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3880           MPIAIJ, MatGetInfo()
3881 @*/
3882 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3883 {
3884   PetscErrorCode ierr;
3885 
3886   PetscFunctionBegin;
3887   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3888   PetscValidType(B,1);
3889   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3890   PetscFunctionReturn(0);
3891 }
3892 
3893 #undef __FUNCT__
3894 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3895 /*@
3896      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3897          CSR format the local rows.
3898 
3899    Collective on MPI_Comm
3900 
3901    Input Parameters:
3902 +  comm - MPI communicator
3903 .  m - number of local rows (Cannot be PETSC_DECIDE)
3904 .  n - This value should be the same as the local size used in creating the
3905        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3906        calculated if N is given) For square matrices n is almost always m.
3907 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3908 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3909 .   i - row indices
3910 .   j - column indices
3911 -   a - matrix values
3912 
3913    Output Parameter:
3914 .   mat - the matrix
3915 
3916    Level: intermediate
3917 
3918    Notes:
3919        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3920      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3921      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3922 
3923        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3924 
3925        The format which is used for the sparse matrix input, is equivalent to a
3926     row-major ordering.. i.e for the following matrix, the input data expected is
3927     as shown:
3928 
3929         1 0 0
3930         2 0 3     P0
3931        -------
3932         4 5 6     P1
3933 
3934      Process0 [P0]: rows_owned=[0,1]
3935         i =  {0,1,3}  [size = nrow+1  = 2+1]
3936         j =  {0,0,2}  [size = nz = 6]
3937         v =  {1,2,3}  [size = nz = 6]
3938 
3939      Process1 [P1]: rows_owned=[2]
3940         i =  {0,3}    [size = nrow+1  = 1+1]
3941         j =  {0,1,2}  [size = nz = 6]
3942         v =  {4,5,6}  [size = nz = 6]
3943 
3944 .keywords: matrix, aij, compressed row, sparse, parallel
3945 
3946 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3947           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3948 @*/
3949 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3950 {
3951   PetscErrorCode ierr;
3952 
3953  PetscFunctionBegin;
3954   if (i[0]) {
3955     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3956   }
3957   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3958   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3959   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3960   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3961   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3962   PetscFunctionReturn(0);
3963 }
3964 
3965 #undef __FUNCT__
3966 #define __FUNCT__ "MatCreateMPIAIJ"
3967 /*@C
3968    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3969    (the default parallel PETSc format).  For good matrix assembly performance
3970    the user should preallocate the matrix storage by setting the parameters
3971    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3972    performance can be increased by more than a factor of 50.
3973 
3974    Collective on MPI_Comm
3975 
3976    Input Parameters:
3977 +  comm - MPI communicator
3978 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3979            This value should be the same as the local size used in creating the
3980            y vector for the matrix-vector product y = Ax.
3981 .  n - This value should be the same as the local size used in creating the
3982        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3983        calculated if N is given) For square matrices n is almost always m.
3984 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3985 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3986 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3987            (same value is used for all local rows)
3988 .  d_nnz - array containing the number of nonzeros in the various rows of the
3989            DIAGONAL portion of the local submatrix (possibly different for each row)
3990            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3991            The size of this array is equal to the number of local rows, i.e 'm'.
3992            You must leave room for the diagonal entry even if it is zero.
3993 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3994            submatrix (same value is used for all local rows).
3995 -  o_nnz - array containing the number of nonzeros in the various rows of the
3996            OFF-DIAGONAL portion of the local submatrix (possibly different for
3997            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3998            structure. The size of this array is equal to the number
3999            of local rows, i.e 'm'.
4000 
4001    Output Parameter:
4002 .  A - the matrix
4003 
4004    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4005    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4006    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4007 
4008    Notes:
4009    If the *_nnz parameter is given then the *_nz parameter is ignored
4010 
4011    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4012    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4013    storage requirements for this matrix.
4014 
4015    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4016    processor than it must be used on all processors that share the object for
4017    that argument.
4018 
4019    The user MUST specify either the local or global matrix dimensions
4020    (possibly both).
4021 
4022    The parallel matrix is partitioned across processors such that the
4023    first m0 rows belong to process 0, the next m1 rows belong to
4024    process 1, the next m2 rows belong to process 2 etc.. where
4025    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4026    values corresponding to [m x N] submatrix.
4027 
4028    The columns are logically partitioned with the n0 columns belonging
4029    to 0th partition, the next n1 columns belonging to the next
4030    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4031 
4032    The DIAGONAL portion of the local submatrix on any given processor
4033    is the submatrix corresponding to the rows and columns m,n
4034    corresponding to the given processor. i.e diagonal matrix on
4035    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4036    etc. The remaining portion of the local submatrix [m x (N-n)]
4037    constitute the OFF-DIAGONAL portion. The example below better
4038    illustrates this concept.
4039 
4040    For a square global matrix we define each processor's diagonal portion
4041    to be its local rows and the corresponding columns (a square submatrix);
4042    each processor's off-diagonal portion encompasses the remainder of the
4043    local matrix (a rectangular submatrix).
4044 
4045    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4046 
4047    When calling this routine with a single process communicator, a matrix of
4048    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4049    type of communicator, use the construction mechanism:
4050      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4051 
4052    By default, this format uses inodes (identical nodes) when possible.
4053    We search for consecutive rows with the same nonzero structure, thereby
4054    reusing matrix information to achieve increased efficiency.
4055 
4056    Options Database Keys:
4057 +  -mat_no_inode  - Do not use inodes
4058 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4059 -  -mat_aij_oneindex - Internally use indexing starting at 1
4060         rather than 0.  Note that when calling MatSetValues(),
4061         the user still MUST index entries starting at 0!
4062 
4063 
4064    Example usage:
4065 
4066    Consider the following 8x8 matrix with 34 non-zero values, that is
4067    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4068    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4069    as follows:
4070 
4071 .vb
4072             1  2  0  |  0  3  0  |  0  4
4073     Proc0   0  5  6  |  7  0  0  |  8  0
4074             9  0 10  | 11  0  0  | 12  0
4075     -------------------------------------
4076            13  0 14  | 15 16 17  |  0  0
4077     Proc1   0 18  0  | 19 20 21  |  0  0
4078             0  0  0  | 22 23  0  | 24  0
4079     -------------------------------------
4080     Proc2  25 26 27  |  0  0 28  | 29  0
4081            30  0  0  | 31 32 33  |  0 34
4082 .ve
4083 
4084    This can be represented as a collection of submatrices as:
4085 
4086 .vb
4087       A B C
4088       D E F
4089       G H I
4090 .ve
4091 
4092    Where the submatrices A,B,C are owned by proc0, D,E,F are
4093    owned by proc1, G,H,I are owned by proc2.
4094 
4095    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4096    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4097    The 'M','N' parameters are 8,8, and have the same values on all procs.
4098 
4099    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4100    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4101    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4102    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4103    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4104    matrix, ans [DF] as another SeqAIJ matrix.
4105 
4106    When d_nz, o_nz parameters are specified, d_nz storage elements are
4107    allocated for every row of the local diagonal submatrix, and o_nz
4108    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4109    One way to choose d_nz and o_nz is to use the max nonzerors per local
4110    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4111    In this case, the values of d_nz,o_nz are:
4112 .vb
4113      proc0 : dnz = 2, o_nz = 2
4114      proc1 : dnz = 3, o_nz = 2
4115      proc2 : dnz = 1, o_nz = 4
4116 .ve
4117    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4118    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4119    for proc3. i.e we are using 12+15+10=37 storage locations to store
4120    34 values.
4121 
4122    When d_nnz, o_nnz parameters are specified, the storage is specified
4123    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4124    In the above case the values for d_nnz,o_nnz are:
4125 .vb
4126      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4127      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4128      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4129 .ve
4130    Here the space allocated is sum of all the above values i.e 34, and
4131    hence pre-allocation is perfect.
4132 
4133    Level: intermediate
4134 
4135 .keywords: matrix, aij, compressed row, sparse, parallel
4136 
4137 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4138           MPIAIJ, MatCreateMPIAIJWithArrays()
4139 @*/
4140 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4141 {
4142   PetscErrorCode ierr;
4143   PetscMPIInt    size;
4144 
4145   PetscFunctionBegin;
4146   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4147   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4148   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4149   if (size > 1) {
4150     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4151     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4152   } else {
4153     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4154     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4155   }
4156   PetscFunctionReturn(0);
4157 }
4158 
4159 #undef __FUNCT__
4160 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4161 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4162 {
4163   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4164 
4165   PetscFunctionBegin;
4166   *Ad     = a->A;
4167   *Ao     = a->B;
4168   *colmap = a->garray;
4169   PetscFunctionReturn(0);
4170 }
4171 
4172 #undef __FUNCT__
4173 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4174 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4175 {
4176   PetscErrorCode ierr;
4177   PetscInt       i;
4178   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4179 
4180   PetscFunctionBegin;
4181   if (coloring->ctype == IS_COLORING_GLOBAL) {
4182     ISColoringValue *allcolors,*colors;
4183     ISColoring      ocoloring;
4184 
4185     /* set coloring for diagonal portion */
4186     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4187 
4188     /* set coloring for off-diagonal portion */
4189     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4190     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4191     for (i=0; i<a->B->cmap->n; i++) {
4192       colors[i] = allcolors[a->garray[i]];
4193     }
4194     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4195     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4196     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4197     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4198   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4199     ISColoringValue *colors;
4200     PetscInt        *larray;
4201     ISColoring      ocoloring;
4202 
4203     /* set coloring for diagonal portion */
4204     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4205     for (i=0; i<a->A->cmap->n; i++) {
4206       larray[i] = i + A->cmap->rstart;
4207     }
4208     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4209     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4210     for (i=0; i<a->A->cmap->n; i++) {
4211       colors[i] = coloring->colors[larray[i]];
4212     }
4213     ierr = PetscFree(larray);CHKERRQ(ierr);
4214     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4215     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4216     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4217 
4218     /* set coloring for off-diagonal portion */
4219     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4220     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4221     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4222     for (i=0; i<a->B->cmap->n; i++) {
4223       colors[i] = coloring->colors[larray[i]];
4224     }
4225     ierr = PetscFree(larray);CHKERRQ(ierr);
4226     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4227     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4228     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4229   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4230 
4231   PetscFunctionReturn(0);
4232 }
4233 
4234 #if defined(PETSC_HAVE_ADIC)
4235 #undef __FUNCT__
4236 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4237 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4238 {
4239   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4240   PetscErrorCode ierr;
4241 
4242   PetscFunctionBegin;
4243   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4244   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4245   PetscFunctionReturn(0);
4246 }
4247 #endif
4248 
4249 #undef __FUNCT__
4250 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4251 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4252 {
4253   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4254   PetscErrorCode ierr;
4255 
4256   PetscFunctionBegin;
4257   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4258   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4259   PetscFunctionReturn(0);
4260 }
4261 
4262 #undef __FUNCT__
4263 #define __FUNCT__ "MatMergeSymbolic"
4264 PetscErrorCode  MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4265 {
4266   PetscErrorCode ierr;
4267   PetscInt       m,N,i,rstart,nnz,*dnz,*onz;
4268   PetscInt       *indx;
4269 
4270   PetscFunctionBegin;
4271   /* This routine will ONLY return MPIAIJ type matrix */
4272   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4273   if (n == PETSC_DECIDE){
4274     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4275   }
4276   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4277   rstart -= m;
4278 
4279   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4280   for (i=0;i<m;i++) {
4281     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4282     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4283     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4284   }
4285 
4286   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4287   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4288   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4289   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4290   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4291   PetscFunctionReturn(0);
4292 }
4293 
4294 #undef __FUNCT__
4295 #define __FUNCT__ "MatMergeNumeric"
4296 PetscErrorCode  MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4297 {
4298   PetscErrorCode ierr;
4299   PetscInt       m,N,i,rstart,nnz,Ii;
4300   PetscInt       *indx;
4301   PetscScalar    *values;
4302 
4303   PetscFunctionBegin;
4304   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4305   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4306   for (i=0;i<m;i++) {
4307     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4308     Ii    = i + rstart;
4309     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4310     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4311   }
4312   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4313   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4314   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4315   PetscFunctionReturn(0);
4316 }
4317 
4318 #undef __FUNCT__
4319 #define __FUNCT__ "MatMerge"
4320 /*@
4321       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4322                  matrices from each processor
4323 
4324     Collective on MPI_Comm
4325 
4326    Input Parameters:
4327 +    comm - the communicators the parallel matrix will live on
4328 .    inmat - the input sequential matrices
4329 .    n - number of local columns (or PETSC_DECIDE)
4330 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4331 
4332    Output Parameter:
4333 .    outmat - the parallel matrix generated
4334 
4335     Level: advanced
4336 
4337    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4338 
4339 @*/
4340 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4341 {
4342   PetscErrorCode ierr;
4343 
4344   PetscFunctionBegin;
4345   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4346   if (scall == MAT_INITIAL_MATRIX){
4347     ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4348   }
4349   ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4350   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4351   PetscFunctionReturn(0);
4352 }
4353 
4354 #undef __FUNCT__
4355 #define __FUNCT__ "MatFileSplit"
4356 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4357 {
4358   PetscErrorCode    ierr;
4359   PetscMPIInt       rank;
4360   PetscInt          m,N,i,rstart,nnz;
4361   size_t            len;
4362   const PetscInt    *indx;
4363   PetscViewer       out;
4364   char              *name;
4365   Mat               B;
4366   const PetscScalar *values;
4367 
4368   PetscFunctionBegin;
4369   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4370   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4371   /* Should this be the type of the diagonal block of A? */
4372   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4373   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4374   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4375   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4376   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4377   for (i=0;i<m;i++) {
4378     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4379     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4380     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4381   }
4382   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4383   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4384 
4385   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4386   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4387   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4388   sprintf(name,"%s.%d",outfile,rank);
4389   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4390   ierr = PetscFree(name);
4391   ierr = MatView(B,out);CHKERRQ(ierr);
4392   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4393   ierr = MatDestroy(&B);CHKERRQ(ierr);
4394   PetscFunctionReturn(0);
4395 }
4396 
4397 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4398 #undef __FUNCT__
4399 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4400 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4401 {
4402   PetscErrorCode       ierr;
4403   Mat_Merge_SeqsToMPI  *merge;
4404   PetscContainer       container;
4405 
4406   PetscFunctionBegin;
4407   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4408   if (container) {
4409     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4410     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4411     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4412     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4413     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4414     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4415     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4416     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4417     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4418     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4419     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4420     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4421     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4422     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4423     ierr = PetscFree(merge);CHKERRQ(ierr);
4424     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4425   }
4426   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4427   PetscFunctionReturn(0);
4428 }
4429 
4430 #include <../src/mat/utils/freespace.h>
4431 #include <petscbt.h>
4432 
4433 #undef __FUNCT__
4434 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4435 /*@C
4436       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4437                  matrices from each processor
4438 
4439     Collective on MPI_Comm
4440 
4441    Input Parameters:
4442 +    comm - the communicators the parallel matrix will live on
4443 .    seqmat - the input sequential matrices
4444 .    m - number of local rows (or PETSC_DECIDE)
4445 .    n - number of local columns (or PETSC_DECIDE)
4446 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4447 
4448    Output Parameter:
4449 .    mpimat - the parallel matrix generated
4450 
4451     Level: advanced
4452 
4453    Notes:
4454      The dimensions of the sequential matrix in each processor MUST be the same.
4455      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4456      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4457 @*/
4458 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4459 {
4460   PetscErrorCode       ierr;
4461   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4462   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4463   PetscMPIInt          size,rank,taga,*len_s;
4464   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4465   PetscInt             proc,m;
4466   PetscInt             **buf_ri,**buf_rj;
4467   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4468   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4469   MPI_Request          *s_waits,*r_waits;
4470   MPI_Status           *status;
4471   MatScalar            *aa=a->a;
4472   MatScalar            **abuf_r,*ba_i;
4473   Mat_Merge_SeqsToMPI  *merge;
4474   PetscContainer       container;
4475 
4476   PetscFunctionBegin;
4477   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4478 
4479   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4480   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4481 
4482   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4483   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4484 
4485   bi     = merge->bi;
4486   bj     = merge->bj;
4487   buf_ri = merge->buf_ri;
4488   buf_rj = merge->buf_rj;
4489 
4490   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4491   owners = merge->rowmap->range;
4492   len_s  = merge->len_s;
4493 
4494   /* send and recv matrix values */
4495   /*-----------------------------*/
4496   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4497   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4498 
4499   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4500   for (proc=0,k=0; proc<size; proc++){
4501     if (!len_s[proc]) continue;
4502     i = owners[proc];
4503     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4504     k++;
4505   }
4506 
4507   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4508   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4509   ierr = PetscFree(status);CHKERRQ(ierr);
4510 
4511   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4512   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4513 
4514   /* insert mat values of mpimat */
4515   /*----------------------------*/
4516   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4517   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4518 
4519   for (k=0; k<merge->nrecv; k++){
4520     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4521     nrows = *(buf_ri_k[k]);
4522     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4523     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4524   }
4525 
4526   /* set values of ba */
4527   m = merge->rowmap->n;
4528   for (i=0; i<m; i++) {
4529     arow = owners[rank] + i;
4530     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4531     bnzi = bi[i+1] - bi[i];
4532     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4533 
4534     /* add local non-zero vals of this proc's seqmat into ba */
4535     anzi = ai[arow+1] - ai[arow];
4536     aj   = a->j + ai[arow];
4537     aa   = a->a + ai[arow];
4538     nextaj = 0;
4539     for (j=0; nextaj<anzi; j++){
4540       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4541         ba_i[j] += aa[nextaj++];
4542       }
4543     }
4544 
4545     /* add received vals into ba */
4546     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4547       /* i-th row */
4548       if (i == *nextrow[k]) {
4549         anzi = *(nextai[k]+1) - *nextai[k];
4550         aj   = buf_rj[k] + *(nextai[k]);
4551         aa   = abuf_r[k] + *(nextai[k]);
4552         nextaj = 0;
4553         for (j=0; nextaj<anzi; j++){
4554           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4555             ba_i[j] += aa[nextaj++];
4556           }
4557         }
4558         nextrow[k]++; nextai[k]++;
4559       }
4560     }
4561     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4562   }
4563   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4564   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4565 
4566   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4567   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4568   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4569   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4570   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4571   PetscFunctionReturn(0);
4572 }
4573 
4574 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4575 
4576 #undef __FUNCT__
4577 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4578 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4579 {
4580   PetscErrorCode       ierr;
4581   Mat                  B_mpi;
4582   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4583   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4584   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4585   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4586   PetscInt             len,proc,*dnz,*onz;
4587   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4588   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4589   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4590   MPI_Status           *status;
4591   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4592   PetscBT              lnkbt;
4593   Mat_Merge_SeqsToMPI  *merge;
4594   PetscContainer       container;
4595 
4596   PetscFunctionBegin;
4597   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4598 
4599   /* make sure it is a PETSc comm */
4600   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4601   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4602   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4603 
4604   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4605   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4606 
4607   /* determine row ownership */
4608   /*---------------------------------------------------------*/
4609   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4610   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4611   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4612   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4613   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4614   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4615   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4616 
4617   m      = merge->rowmap->n;
4618   M      = merge->rowmap->N;
4619   owners = merge->rowmap->range;
4620 
4621   /* determine the number of messages to send, their lengths */
4622   /*---------------------------------------------------------*/
4623   len_s  = merge->len_s;
4624 
4625   len = 0;  /* length of buf_si[] */
4626   merge->nsend = 0;
4627   for (proc=0; proc<size; proc++){
4628     len_si[proc] = 0;
4629     if (proc == rank){
4630       len_s[proc] = 0;
4631     } else {
4632       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4633       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4634     }
4635     if (len_s[proc]) {
4636       merge->nsend++;
4637       nrows = 0;
4638       for (i=owners[proc]; i<owners[proc+1]; i++){
4639         if (ai[i+1] > ai[i]) nrows++;
4640       }
4641       len_si[proc] = 2*(nrows+1);
4642       len += len_si[proc];
4643     }
4644   }
4645 
4646   /* determine the number and length of messages to receive for ij-structure */
4647   /*-------------------------------------------------------------------------*/
4648   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4649   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4650 
4651   /* post the Irecv of j-structure */
4652   /*-------------------------------*/
4653   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4654   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4655 
4656   /* post the Isend of j-structure */
4657   /*--------------------------------*/
4658   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4659 
4660   for (proc=0, k=0; proc<size; proc++){
4661     if (!len_s[proc]) continue;
4662     i = owners[proc];
4663     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4664     k++;
4665   }
4666 
4667   /* receives and sends of j-structure are complete */
4668   /*------------------------------------------------*/
4669   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4670   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4671 
4672   /* send and recv i-structure */
4673   /*---------------------------*/
4674   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4675   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4676 
4677   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4678   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4679   for (proc=0,k=0; proc<size; proc++){
4680     if (!len_s[proc]) continue;
4681     /* form outgoing message for i-structure:
4682          buf_si[0]:                 nrows to be sent
4683                [1:nrows]:           row index (global)
4684                [nrows+1:2*nrows+1]: i-structure index
4685     */
4686     /*-------------------------------------------*/
4687     nrows = len_si[proc]/2 - 1;
4688     buf_si_i    = buf_si + nrows+1;
4689     buf_si[0]   = nrows;
4690     buf_si_i[0] = 0;
4691     nrows = 0;
4692     for (i=owners[proc]; i<owners[proc+1]; i++){
4693       anzi = ai[i+1] - ai[i];
4694       if (anzi) {
4695         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4696         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4697         nrows++;
4698       }
4699     }
4700     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4701     k++;
4702     buf_si += len_si[proc];
4703   }
4704 
4705   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4706   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4707 
4708   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4709   for (i=0; i<merge->nrecv; i++){
4710     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4711   }
4712 
4713   ierr = PetscFree(len_si);CHKERRQ(ierr);
4714   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4715   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4716   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4717   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4718   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4719   ierr = PetscFree(status);CHKERRQ(ierr);
4720 
4721   /* compute a local seq matrix in each processor */
4722   /*----------------------------------------------*/
4723   /* allocate bi array and free space for accumulating nonzero column info */
4724   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4725   bi[0] = 0;
4726 
4727   /* create and initialize a linked list */
4728   nlnk = N+1;
4729   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4730 
4731   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4732   len = 0;
4733   len  = ai[owners[rank+1]] - ai[owners[rank]];
4734   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4735   current_space = free_space;
4736 
4737   /* determine symbolic info for each local row */
4738   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4739 
4740   for (k=0; k<merge->nrecv; k++){
4741     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4742     nrows = *buf_ri_k[k];
4743     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4744     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4745   }
4746 
4747   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4748   len = 0;
4749   for (i=0;i<m;i++) {
4750     bnzi   = 0;
4751     /* add local non-zero cols of this proc's seqmat into lnk */
4752     arow   = owners[rank] + i;
4753     anzi   = ai[arow+1] - ai[arow];
4754     aj     = a->j + ai[arow];
4755     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4756     bnzi += nlnk;
4757     /* add received col data into lnk */
4758     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4759       if (i == *nextrow[k]) { /* i-th row */
4760         anzi = *(nextai[k]+1) - *nextai[k];
4761         aj   = buf_rj[k] + *nextai[k];
4762         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4763         bnzi += nlnk;
4764         nextrow[k]++; nextai[k]++;
4765       }
4766     }
4767     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4768 
4769     /* if free space is not available, make more free space */
4770     if (current_space->local_remaining<bnzi) {
4771       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4772       nspacedouble++;
4773     }
4774     /* copy data into free space, then initialize lnk */
4775     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4776     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4777 
4778     current_space->array           += bnzi;
4779     current_space->local_used      += bnzi;
4780     current_space->local_remaining -= bnzi;
4781 
4782     bi[i+1] = bi[i] + bnzi;
4783   }
4784 
4785   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4786 
4787   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4788   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4789   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4790 
4791   /* create symbolic parallel matrix B_mpi */
4792   /*---------------------------------------*/
4793   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4794   if (n==PETSC_DECIDE) {
4795     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4796   } else {
4797     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4798   }
4799   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4800   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4801   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4802 
4803   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4804   B_mpi->assembled     = PETSC_FALSE;
4805   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4806   merge->bi            = bi;
4807   merge->bj            = bj;
4808   merge->buf_ri        = buf_ri;
4809   merge->buf_rj        = buf_rj;
4810   merge->coi           = PETSC_NULL;
4811   merge->coj           = PETSC_NULL;
4812   merge->owners_co     = PETSC_NULL;
4813 
4814   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4815 
4816   /* attach the supporting struct to B_mpi for reuse */
4817   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4818   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4819   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4820   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4821   *mpimat = B_mpi;
4822 
4823   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4824   PetscFunctionReturn(0);
4825 }
4826 
4827 #undef __FUNCT__
4828 #define __FUNCT__ "MatMerge_SeqsToMPI"
4829 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4830 {
4831   PetscErrorCode   ierr;
4832 
4833   PetscFunctionBegin;
4834   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4835   if (scall == MAT_INITIAL_MATRIX){
4836     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4837   }
4838   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4839   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4840   PetscFunctionReturn(0);
4841 }
4842 
4843 #undef __FUNCT__
4844 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4845 /*@
4846      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4847           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4848           with MatGetSize()
4849 
4850     Not Collective
4851 
4852    Input Parameters:
4853 +    A - the matrix
4854 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4855 
4856    Output Parameter:
4857 .    A_loc - the local sequential matrix generated
4858 
4859     Level: developer
4860 
4861 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4862 
4863 @*/
4864 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4865 {
4866   PetscErrorCode  ierr;
4867   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4868   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4869   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4870   MatScalar       *aa=a->a,*ba=b->a,*cam;
4871   PetscScalar     *ca;
4872   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4873   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4874   PetscBool       match;
4875 
4876   PetscFunctionBegin;
4877   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4878   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4879   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4880   if (scall == MAT_INITIAL_MATRIX){
4881     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4882     ci[0] = 0;
4883     for (i=0; i<am; i++){
4884       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4885     }
4886     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4887     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4888     k = 0;
4889     for (i=0; i<am; i++) {
4890       ncols_o = bi[i+1] - bi[i];
4891       ncols_d = ai[i+1] - ai[i];
4892       /* off-diagonal portion of A */
4893       for (jo=0; jo<ncols_o; jo++) {
4894         col = cmap[*bj];
4895         if (col >= cstart) break;
4896         cj[k]   = col; bj++;
4897         ca[k++] = *ba++;
4898       }
4899       /* diagonal portion of A */
4900       for (j=0; j<ncols_d; j++) {
4901         cj[k]   = cstart + *aj++;
4902         ca[k++] = *aa++;
4903       }
4904       /* off-diagonal portion of A */
4905       for (j=jo; j<ncols_o; j++) {
4906         cj[k]   = cmap[*bj++];
4907         ca[k++] = *ba++;
4908       }
4909     }
4910     /* put together the new matrix */
4911     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4912     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4913     /* Since these are PETSc arrays, change flags to free them as necessary. */
4914     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4915     mat->free_a  = PETSC_TRUE;
4916     mat->free_ij = PETSC_TRUE;
4917     mat->nonew   = 0;
4918   } else if (scall == MAT_REUSE_MATRIX){
4919     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4920     ci = mat->i; cj = mat->j; cam = mat->a;
4921     for (i=0; i<am; i++) {
4922       /* off-diagonal portion of A */
4923       ncols_o = bi[i+1] - bi[i];
4924       for (jo=0; jo<ncols_o; jo++) {
4925         col = cmap[*bj];
4926         if (col >= cstart) break;
4927         *cam++ = *ba++; bj++;
4928       }
4929       /* diagonal portion of A */
4930       ncols_d = ai[i+1] - ai[i];
4931       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4932       /* off-diagonal portion of A */
4933       for (j=jo; j<ncols_o; j++) {
4934         *cam++ = *ba++; bj++;
4935       }
4936     }
4937   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4938   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4939   PetscFunctionReturn(0);
4940 }
4941 
4942 #undef __FUNCT__
4943 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4944 /*@C
4945      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4946 
4947     Not Collective
4948 
4949    Input Parameters:
4950 +    A - the matrix
4951 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4952 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4953 
4954    Output Parameter:
4955 .    A_loc - the local sequential matrix generated
4956 
4957     Level: developer
4958 
4959 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4960 
4961 @*/
4962 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4963 {
4964   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4965   PetscErrorCode    ierr;
4966   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4967   IS                isrowa,iscola;
4968   Mat               *aloc;
4969   PetscBool       match;
4970 
4971   PetscFunctionBegin;
4972   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4973   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4974   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4975   if (!row){
4976     start = A->rmap->rstart; end = A->rmap->rend;
4977     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4978   } else {
4979     isrowa = *row;
4980   }
4981   if (!col){
4982     start = A->cmap->rstart;
4983     cmap  = a->garray;
4984     nzA   = a->A->cmap->n;
4985     nzB   = a->B->cmap->n;
4986     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4987     ncols = 0;
4988     for (i=0; i<nzB; i++) {
4989       if (cmap[i] < start) idx[ncols++] = cmap[i];
4990       else break;
4991     }
4992     imark = i;
4993     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4994     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4995     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4996   } else {
4997     iscola = *col;
4998   }
4999   if (scall != MAT_INITIAL_MATRIX){
5000     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5001     aloc[0] = *A_loc;
5002   }
5003   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5004   *A_loc = aloc[0];
5005   ierr = PetscFree(aloc);CHKERRQ(ierr);
5006   if (!row){
5007     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5008   }
5009   if (!col){
5010     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5011   }
5012   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5013   PetscFunctionReturn(0);
5014 }
5015 
5016 #undef __FUNCT__
5017 #define __FUNCT__ "MatGetBrowsOfAcols"
5018 /*@C
5019     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5020 
5021     Collective on Mat
5022 
5023    Input Parameters:
5024 +    A,B - the matrices in mpiaij format
5025 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5026 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5027 
5028    Output Parameter:
5029 +    rowb, colb - index sets of rows and columns of B to extract
5030 -    B_seq - the sequential matrix generated
5031 
5032     Level: developer
5033 
5034 @*/
5035 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5036 {
5037   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5038   PetscErrorCode    ierr;
5039   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5040   IS                isrowb,iscolb;
5041   Mat               *bseq=PETSC_NULL;
5042 
5043   PetscFunctionBegin;
5044   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5045     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5046   }
5047   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5048 
5049   if (scall == MAT_INITIAL_MATRIX){
5050     start = A->cmap->rstart;
5051     cmap  = a->garray;
5052     nzA   = a->A->cmap->n;
5053     nzB   = a->B->cmap->n;
5054     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5055     ncols = 0;
5056     for (i=0; i<nzB; i++) {  /* row < local row index */
5057       if (cmap[i] < start) idx[ncols++] = cmap[i];
5058       else break;
5059     }
5060     imark = i;
5061     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5062     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5063     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5064     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5065   } else {
5066     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5067     isrowb = *rowb; iscolb = *colb;
5068     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5069     bseq[0] = *B_seq;
5070   }
5071   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5072   *B_seq = bseq[0];
5073   ierr = PetscFree(bseq);CHKERRQ(ierr);
5074   if (!rowb){
5075     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5076   } else {
5077     *rowb = isrowb;
5078   }
5079   if (!colb){
5080     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5081   } else {
5082     *colb = iscolb;
5083   }
5084   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5085   PetscFunctionReturn(0);
5086 }
5087 
5088 #undef __FUNCT__
5089 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5090 /*
5091     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5092     of the OFF-DIAGONAL portion of local A
5093 
5094     Collective on Mat
5095 
5096    Input Parameters:
5097 +    A,B - the matrices in mpiaij format
5098 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5099 
5100    Output Parameter:
5101 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5102 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5103 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5104 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5105 
5106     Level: developer
5107 
5108 */
5109 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5110 {
5111   VecScatter_MPI_General *gen_to,*gen_from;
5112   PetscErrorCode         ierr;
5113   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5114   Mat_SeqAIJ             *b_oth;
5115   VecScatter             ctx=a->Mvctx;
5116   MPI_Comm               comm=((PetscObject)ctx)->comm;
5117   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5118   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5119   PetscScalar            *rvalues,*svalues;
5120   MatScalar              *b_otha,*bufa,*bufA;
5121   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5122   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5123   MPI_Status             *sstatus,rstatus;
5124   PetscMPIInt            jj;
5125   PetscInt               *cols,sbs,rbs;
5126   PetscScalar            *vals;
5127 
5128   PetscFunctionBegin;
5129   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5130     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5131   }
5132   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5133   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5134 
5135   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5136   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5137   rvalues  = gen_from->values; /* holds the length of receiving row */
5138   svalues  = gen_to->values;   /* holds the length of sending row */
5139   nrecvs   = gen_from->n;
5140   nsends   = gen_to->n;
5141 
5142   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5143   srow     = gen_to->indices;   /* local row index to be sent */
5144   sstarts  = gen_to->starts;
5145   sprocs   = gen_to->procs;
5146   sstatus  = gen_to->sstatus;
5147   sbs      = gen_to->bs;
5148   rstarts  = gen_from->starts;
5149   rprocs   = gen_from->procs;
5150   rbs      = gen_from->bs;
5151 
5152   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5153   if (scall == MAT_INITIAL_MATRIX){
5154     /* i-array */
5155     /*---------*/
5156     /*  post receives */
5157     for (i=0; i<nrecvs; i++){
5158       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5159       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5160       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5161     }
5162 
5163     /* pack the outgoing message */
5164     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5165     sstartsj[0] = 0;  rstartsj[0] = 0;
5166     len = 0; /* total length of j or a array to be sent */
5167     k = 0;
5168     for (i=0; i<nsends; i++){
5169       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5170       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5171       for (j=0; j<nrows; j++) {
5172         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5173         for (l=0; l<sbs; l++){
5174           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5175           rowlen[j*sbs+l] = ncols;
5176           len += ncols;
5177           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5178         }
5179         k++;
5180       }
5181       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5182       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5183     }
5184     /* recvs and sends of i-array are completed */
5185     i = nrecvs;
5186     while (i--) {
5187       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5188     }
5189     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5190 
5191     /* allocate buffers for sending j and a arrays */
5192     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5193     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5194 
5195     /* create i-array of B_oth */
5196     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5197     b_othi[0] = 0;
5198     len = 0; /* total length of j or a array to be received */
5199     k = 0;
5200     for (i=0; i<nrecvs; i++){
5201       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5202       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5203       for (j=0; j<nrows; j++) {
5204         b_othi[k+1] = b_othi[k] + rowlen[j];
5205         len += rowlen[j]; k++;
5206       }
5207       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5208     }
5209 
5210     /* allocate space for j and a arrrays of B_oth */
5211     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5212     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5213 
5214     /* j-array */
5215     /*---------*/
5216     /*  post receives of j-array */
5217     for (i=0; i<nrecvs; i++){
5218       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5219       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5220     }
5221 
5222     /* pack the outgoing message j-array */
5223     k = 0;
5224     for (i=0; i<nsends; i++){
5225       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5226       bufJ = bufj+sstartsj[i];
5227       for (j=0; j<nrows; j++) {
5228         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5229         for (ll=0; ll<sbs; ll++){
5230           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5231           for (l=0; l<ncols; l++){
5232             *bufJ++ = cols[l];
5233           }
5234           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5235         }
5236       }
5237       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5238     }
5239 
5240     /* recvs and sends of j-array are completed */
5241     i = nrecvs;
5242     while (i--) {
5243       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5244     }
5245     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5246   } else if (scall == MAT_REUSE_MATRIX){
5247     sstartsj = *startsj_s;
5248     rstartsj = *startsj_r;
5249     bufa     = *bufa_ptr;
5250     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5251     b_otha   = b_oth->a;
5252   } else {
5253     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5254   }
5255 
5256   /* a-array */
5257   /*---------*/
5258   /*  post receives of a-array */
5259   for (i=0; i<nrecvs; i++){
5260     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5261     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5262   }
5263 
5264   /* pack the outgoing message a-array */
5265   k = 0;
5266   for (i=0; i<nsends; i++){
5267     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5268     bufA = bufa+sstartsj[i];
5269     for (j=0; j<nrows; j++) {
5270       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5271       for (ll=0; ll<sbs; ll++){
5272         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5273         for (l=0; l<ncols; l++){
5274           *bufA++ = vals[l];
5275         }
5276         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5277       }
5278     }
5279     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5280   }
5281   /* recvs and sends of a-array are completed */
5282   i = nrecvs;
5283   while (i--) {
5284     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5285   }
5286   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5287   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5288 
5289   if (scall == MAT_INITIAL_MATRIX){
5290     /* put together the new matrix */
5291     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5292 
5293     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5294     /* Since these are PETSc arrays, change flags to free them as necessary. */
5295     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5296     b_oth->free_a  = PETSC_TRUE;
5297     b_oth->free_ij = PETSC_TRUE;
5298     b_oth->nonew   = 0;
5299 
5300     ierr = PetscFree(bufj);CHKERRQ(ierr);
5301     if (!startsj_s || !bufa_ptr){
5302       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5303       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5304     } else {
5305       *startsj_s = sstartsj;
5306       *startsj_r = rstartsj;
5307       *bufa_ptr  = bufa;
5308     }
5309   }
5310   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5311   PetscFunctionReturn(0);
5312 }
5313 
5314 #undef __FUNCT__
5315 #define __FUNCT__ "MatGetCommunicationStructs"
5316 /*@C
5317   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5318 
5319   Not Collective
5320 
5321   Input Parameters:
5322 . A - The matrix in mpiaij format
5323 
5324   Output Parameter:
5325 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5326 . colmap - A map from global column index to local index into lvec
5327 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5328 
5329   Level: developer
5330 
5331 @*/
5332 #if defined (PETSC_USE_CTABLE)
5333 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5334 #else
5335 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5336 #endif
5337 {
5338   Mat_MPIAIJ *a;
5339 
5340   PetscFunctionBegin;
5341   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5342   PetscValidPointer(lvec, 2);
5343   PetscValidPointer(colmap, 3);
5344   PetscValidPointer(multScatter, 4);
5345   a = (Mat_MPIAIJ *) A->data;
5346   if (lvec) *lvec = a->lvec;
5347   if (colmap) *colmap = a->colmap;
5348   if (multScatter) *multScatter = a->Mvctx;
5349   PetscFunctionReturn(0);
5350 }
5351 
5352 EXTERN_C_BEGIN
5353 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5354 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5355 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5356 EXTERN_C_END
5357 
5358 #undef __FUNCT__
5359 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5360 /*
5361     Computes (B'*A')' since computing B*A directly is untenable
5362 
5363                n                       p                          p
5364         (              )       (              )         (                  )
5365       m (      A       )  *  n (       B      )   =   m (         C        )
5366         (              )       (              )         (                  )
5367 
5368 */
5369 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5370 {
5371   PetscErrorCode     ierr;
5372   Mat                At,Bt,Ct;
5373 
5374   PetscFunctionBegin;
5375   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5376   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5377   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5378   ierr = MatDestroy(&At);CHKERRQ(ierr);
5379   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5380   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5381   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5382   PetscFunctionReturn(0);
5383 }
5384 
5385 #undef __FUNCT__
5386 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5387 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5388 {
5389   PetscErrorCode ierr;
5390   PetscInt       m=A->rmap->n,n=B->cmap->n;
5391   Mat            Cmat;
5392 
5393   PetscFunctionBegin;
5394   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5395   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5396   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5397   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5398   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5399   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5400   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5401   *C   = Cmat;
5402   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5403   PetscFunctionReturn(0);
5404 }
5405 
5406 /* ----------------------------------------------------------------*/
5407 #undef __FUNCT__
5408 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5409 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5410 {
5411   PetscErrorCode ierr;
5412 
5413   PetscFunctionBegin;
5414   if (scall == MAT_INITIAL_MATRIX){
5415     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5416   }
5417   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5418   PetscFunctionReturn(0);
5419 }
5420 
5421 EXTERN_C_BEGIN
5422 #if defined(PETSC_HAVE_MUMPS)
5423 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5424 #endif
5425 #if defined(PETSC_HAVE_PASTIX)
5426 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5427 #endif
5428 #if defined(PETSC_HAVE_SUPERLU_DIST)
5429 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5430 #endif
5431 #if defined(PETSC_HAVE_SPOOLES)
5432 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5433 #endif
5434 EXTERN_C_END
5435 
5436 /*MC
5437    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5438 
5439    Options Database Keys:
5440 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5441 
5442   Level: beginner
5443 
5444 .seealso: MatCreateMPIAIJ()
5445 M*/
5446 
5447 EXTERN_C_BEGIN
5448 #undef __FUNCT__
5449 #define __FUNCT__ "MatCreate_MPIAIJ"
5450 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5451 {
5452   Mat_MPIAIJ     *b;
5453   PetscErrorCode ierr;
5454   PetscMPIInt    size;
5455 
5456   PetscFunctionBegin;
5457   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5458 
5459   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5460   B->data         = (void*)b;
5461   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5462   B->rmap->bs     = 1;
5463   B->assembled    = PETSC_FALSE;
5464 
5465   B->insertmode   = NOT_SET_VALUES;
5466   b->size         = size;
5467   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5468 
5469   /* build cache for off array entries formed */
5470   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5471   b->donotstash  = PETSC_FALSE;
5472   b->colmap      = 0;
5473   b->garray      = 0;
5474   b->roworiented = PETSC_TRUE;
5475 
5476   /* stuff used for matrix vector multiply */
5477   b->lvec      = PETSC_NULL;
5478   b->Mvctx     = PETSC_NULL;
5479 
5480   /* stuff for MatGetRow() */
5481   b->rowindices   = 0;
5482   b->rowvalues    = 0;
5483   b->getrowactive = PETSC_FALSE;
5484 
5485 #if defined(PETSC_HAVE_SPOOLES)
5486   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5487                                      "MatGetFactor_mpiaij_spooles",
5488                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5489 #endif
5490 #if defined(PETSC_HAVE_MUMPS)
5491   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5492                                      "MatGetFactor_aij_mumps",
5493                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5494 #endif
5495 #if defined(PETSC_HAVE_PASTIX)
5496   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5497 					   "MatGetFactor_mpiaij_pastix",
5498 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5499 #endif
5500 #if defined(PETSC_HAVE_SUPERLU_DIST)
5501   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5502                                      "MatGetFactor_mpiaij_superlu_dist",
5503                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5504 #endif
5505   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5506                                      "MatStoreValues_MPIAIJ",
5507                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5508   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5509                                      "MatRetrieveValues_MPIAIJ",
5510                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5511   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5512 				     "MatGetDiagonalBlock_MPIAIJ",
5513                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5514   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5515 				     "MatIsTranspose_MPIAIJ",
5516 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5517   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5518 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5519 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5520   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5521 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5522 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5523   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5524 				     "MatDiagonalScaleLocal_MPIAIJ",
5525 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5526   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5527                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5528                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5529   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5530                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5531                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5532   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5533                                      "MatConvert_MPIAIJ_MPISBAIJ",
5534                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5535   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5536                                      "MatMatMult_MPIDense_MPIAIJ",
5537                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5538   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5539                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5540                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5541   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5542                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5543                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5544   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5545   PetscFunctionReturn(0);
5546 }
5547 EXTERN_C_END
5548 
5549 #undef __FUNCT__
5550 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5551 /*@
5552      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5553          and "off-diagonal" part of the matrix in CSR format.
5554 
5555    Collective on MPI_Comm
5556 
5557    Input Parameters:
5558 +  comm - MPI communicator
5559 .  m - number of local rows (Cannot be PETSC_DECIDE)
5560 .  n - This value should be the same as the local size used in creating the
5561        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5562        calculated if N is given) For square matrices n is almost always m.
5563 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5564 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5565 .   i - row indices for "diagonal" portion of matrix
5566 .   j - column indices
5567 .   a - matrix values
5568 .   oi - row indices for "off-diagonal" portion of matrix
5569 .   oj - column indices
5570 -   oa - matrix values
5571 
5572    Output Parameter:
5573 .   mat - the matrix
5574 
5575    Level: advanced
5576 
5577    Notes:
5578        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5579        must free the arrays once the matrix has been destroyed and not before.
5580 
5581        The i and j indices are 0 based
5582 
5583        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5584 
5585        This sets local rows and cannot be used to set off-processor values.
5586 
5587        You cannot later use MatSetValues() to change values in this matrix.
5588 
5589 .keywords: matrix, aij, compressed row, sparse, parallel
5590 
5591 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5592           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5593 @*/
5594 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5595 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5596 {
5597   PetscErrorCode ierr;
5598   Mat_MPIAIJ     *maij;
5599 
5600  PetscFunctionBegin;
5601   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5602   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5603   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5604   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5605   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5606   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5607   maij = (Mat_MPIAIJ*) (*mat)->data;
5608   maij->donotstash     = PETSC_TRUE;
5609   (*mat)->preallocated = PETSC_TRUE;
5610 
5611   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5612   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5613   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5614   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5615 
5616   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5617   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5618 
5619   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5620   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5621   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5622   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5623 
5624   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5625   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5626   PetscFunctionReturn(0);
5627 }
5628 
5629 /*
5630     Special version for direct calls from Fortran
5631 */
5632 #include <private/fortranimpl.h>
5633 
5634 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5635 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5636 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5637 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5638 #endif
5639 
5640 /* Change these macros so can be used in void function */
5641 #undef CHKERRQ
5642 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5643 #undef SETERRQ2
5644 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5645 #undef SETERRQ
5646 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5647 
5648 EXTERN_C_BEGIN
5649 #undef __FUNCT__
5650 #define __FUNCT__ "matsetvaluesmpiaij_"
5651 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5652 {
5653   Mat             mat = *mmat;
5654   PetscInt        m = *mm, n = *mn;
5655   InsertMode      addv = *maddv;
5656   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5657   PetscScalar     value;
5658   PetscErrorCode  ierr;
5659 
5660   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5661   if (mat->insertmode == NOT_SET_VALUES) {
5662     mat->insertmode = addv;
5663   }
5664 #if defined(PETSC_USE_DEBUG)
5665   else if (mat->insertmode != addv) {
5666     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5667   }
5668 #endif
5669   {
5670   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5671   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5672   PetscBool       roworiented = aij->roworiented;
5673 
5674   /* Some Variables required in the macro */
5675   Mat             A = aij->A;
5676   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5677   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5678   MatScalar       *aa = a->a;
5679   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5680   Mat             B = aij->B;
5681   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5682   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5683   MatScalar       *ba = b->a;
5684 
5685   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5686   PetscInt        nonew = a->nonew;
5687   MatScalar       *ap1,*ap2;
5688 
5689   PetscFunctionBegin;
5690   for (i=0; i<m; i++) {
5691     if (im[i] < 0) continue;
5692 #if defined(PETSC_USE_DEBUG)
5693     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5694 #endif
5695     if (im[i] >= rstart && im[i] < rend) {
5696       row      = im[i] - rstart;
5697       lastcol1 = -1;
5698       rp1      = aj + ai[row];
5699       ap1      = aa + ai[row];
5700       rmax1    = aimax[row];
5701       nrow1    = ailen[row];
5702       low1     = 0;
5703       high1    = nrow1;
5704       lastcol2 = -1;
5705       rp2      = bj + bi[row];
5706       ap2      = ba + bi[row];
5707       rmax2    = bimax[row];
5708       nrow2    = bilen[row];
5709       low2     = 0;
5710       high2    = nrow2;
5711 
5712       for (j=0; j<n; j++) {
5713         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5714         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5715         if (in[j] >= cstart && in[j] < cend){
5716           col = in[j] - cstart;
5717           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5718         } else if (in[j] < 0) continue;
5719 #if defined(PETSC_USE_DEBUG)
5720         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5721 #endif
5722         else {
5723           if (mat->was_assembled) {
5724             if (!aij->colmap) {
5725               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5726             }
5727 #if defined (PETSC_USE_CTABLE)
5728             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5729 	    col--;
5730 #else
5731             col = aij->colmap[in[j]] - 1;
5732 #endif
5733             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5734               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5735               col =  in[j];
5736               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5737               B = aij->B;
5738               b = (Mat_SeqAIJ*)B->data;
5739               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5740               rp2      = bj + bi[row];
5741               ap2      = ba + bi[row];
5742               rmax2    = bimax[row];
5743               nrow2    = bilen[row];
5744               low2     = 0;
5745               high2    = nrow2;
5746               bm       = aij->B->rmap->n;
5747               ba = b->a;
5748             }
5749           } else col = in[j];
5750           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5751         }
5752       }
5753     } else {
5754       if (!aij->donotstash) {
5755         if (roworiented) {
5756           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5757         } else {
5758           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5759         }
5760       }
5761     }
5762   }}
5763   PetscFunctionReturnVoid();
5764 }
5765 EXTERN_C_END
5766 
5767