xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 85afcc9ae9ea289cfdbcd5f2fb7e605e311ecd9d)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateMPIAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(PETSC_COMM_WORLD,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341 #if defined (PETSC_USE_CTABLE)
342   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
343   for (i=0; i<n; i++){
344     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
345   }
346 #else
347   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
348   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
349   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
351 #endif
352   PetscFunctionReturn(0);
353 }
354 
355 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
356 { \
357     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
358     lastcol1 = col;\
359     while (high1-low1 > 5) { \
360       t = (low1+high1)/2; \
361       if (rp1[t] > col) high1 = t; \
362       else             low1  = t; \
363     } \
364       for (_i=low1; _i<high1; _i++) { \
365         if (rp1[_i] > col) break; \
366         if (rp1[_i] == col) { \
367           if (addv == ADD_VALUES) ap1[_i] += value;   \
368           else                    ap1[_i] = value; \
369           goto a_noinsert; \
370         } \
371       }  \
372       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
373       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
374       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
375       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
376       N = nrow1++ - 1; a->nz++; high1++; \
377       /* shift up all the later entries in this row */ \
378       for (ii=N; ii>=_i; ii--) { \
379         rp1[ii+1] = rp1[ii]; \
380         ap1[ii+1] = ap1[ii]; \
381       } \
382       rp1[_i] = col;  \
383       ap1[_i] = value;  \
384       a_noinsert: ; \
385       ailen[row] = nrow1; \
386 }
387 
388 
389 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
390 { \
391     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
392     lastcol2 = col;\
393     while (high2-low2 > 5) { \
394       t = (low2+high2)/2; \
395       if (rp2[t] > col) high2 = t; \
396       else             low2  = t; \
397     } \
398     for (_i=low2; _i<high2; _i++) {		\
399       if (rp2[_i] > col) break;			\
400       if (rp2[_i] == col) {			      \
401 	if (addv == ADD_VALUES) ap2[_i] += value;     \
402 	else                    ap2[_i] = value;      \
403 	goto b_noinsert;			      \
404       }						      \
405     }							      \
406     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
407     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
408     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
409     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
410     N = nrow2++ - 1; b->nz++; high2++;					\
411     /* shift up all the later entries in this row */			\
412     for (ii=N; ii>=_i; ii--) {						\
413       rp2[ii+1] = rp2[ii];						\
414       ap2[ii+1] = ap2[ii];						\
415     }									\
416     rp2[_i] = col;							\
417     ap2[_i] = value;							\
418     b_noinsert: ;								\
419     bilen[row] = nrow2;							\
420 }
421 
422 #undef __FUNCT__
423 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
424 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
425 {
426   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
427   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
428   PetscErrorCode ierr;
429   PetscInt       l,*garray = mat->garray,diag;
430 
431   PetscFunctionBegin;
432   /* code only works for square matrices A */
433 
434   /* find size of row to the left of the diagonal part */
435   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
436   row  = row - diag;
437   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
438     if (garray[b->j[b->i[row]+l]] > diag) break;
439   }
440   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
441 
442   /* diagonal part */
443   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
444 
445   /* right of diagonal part */
446   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
447   PetscFunctionReturn(0);
448 }
449 
450 #undef __FUNCT__
451 #define __FUNCT__ "MatSetValues_MPIAIJ"
452 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
453 {
454   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
455   PetscScalar    value;
456   PetscErrorCode ierr;
457   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
458   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
459   PetscBool      roworiented = aij->roworiented;
460 
461   /* Some Variables required in the macro */
462   Mat            A = aij->A;
463   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
464   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
465   MatScalar      *aa = a->a;
466   PetscBool      ignorezeroentries = a->ignorezeroentries;
467   Mat            B = aij->B;
468   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
469   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
470   MatScalar      *ba = b->a;
471 
472   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
473   PetscInt       nonew = a->nonew;
474   MatScalar      *ap1,*ap2;
475 
476   PetscFunctionBegin;
477   if (v) PetscValidScalarPointer(v,6);
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_DEBUG)
481     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row      = im[i] - rstart;
485       lastcol1 = -1;
486       rp1      = aj + ai[row];
487       ap1      = aa + ai[row];
488       rmax1    = aimax[row];
489       nrow1    = ailen[row];
490       low1     = 0;
491       high1    = nrow1;
492       lastcol2 = -1;
493       rp2      = bj + bi[row];
494       ap2      = ba + bi[row];
495       rmax2    = bimax[row];
496       nrow2    = bilen[row];
497       low2     = 0;
498       high2    = nrow2;
499 
500       for (j=0; j<n; j++) {
501         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
502         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
503         if (in[j] >= cstart && in[j] < cend){
504           col = in[j] - cstart;
505           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
506         } else if (in[j] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
509 #endif
510         else {
511           if (mat->was_assembled) {
512             if (!aij->colmap) {
513               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
514             }
515 #if defined (PETSC_USE_CTABLE)
516             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
517 	    col--;
518 #else
519             col = aij->colmap[in[j]] - 1;
520 #endif
521             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
522               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523               col =  in[j];
524               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
525               B = aij->B;
526               b = (Mat_SeqAIJ*)B->data;
527               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
528               rp2      = bj + bi[row];
529               ap2      = ba + bi[row];
530               rmax2    = bimax[row];
531               nrow2    = bilen[row];
532               low2     = 0;
533               high2    = nrow2;
534               bm       = aij->B->rmap->n;
535               ba = b->a;
536             }
537           } else col = in[j];
538           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
539         }
540       }
541     } else {
542       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
543       if (!aij->donotstash) {
544         if (roworiented) {
545           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
546         } else {
547           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
548         }
549       }
550     }
551   }
552   PetscFunctionReturn(0);
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPIAIJ"
557 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
558 {
559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
560   PetscErrorCode ierr;
561   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
562   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
563 
564   PetscFunctionBegin;
565   for (i=0; i<m; i++) {
566     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
567     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
568     if (idxm[i] >= rstart && idxm[i] < rend) {
569       row = idxm[i] - rstart;
570       for (j=0; j<n; j++) {
571         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
572         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
573         if (idxn[j] >= cstart && idxn[j] < cend){
574           col = idxn[j] - cstart;
575           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
576         } else {
577           if (!aij->colmap) {
578             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
579           }
580 #if defined (PETSC_USE_CTABLE)
581           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
582           col --;
583 #else
584           col = aij->colmap[idxn[j]] - 1;
585 #endif
586           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
587           else {
588             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
603 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
604 {
605   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
606   PetscErrorCode ierr;
607   PetscInt       nstash,reallocs;
608   InsertMode     addv;
609 
610   PetscFunctionBegin;
611   if (aij->donotstash || mat->nooffprocentries) {
612     PetscFunctionReturn(0);
613   }
614 
615   /* make sure all processors are either in INSERTMODE or ADDMODE */
616   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
617   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
618   mat->insertmode = addv; /* in case this processor had no cache */
619 
620   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
621   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
622   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
628 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
632   PetscErrorCode ierr;
633   PetscMPIInt    n;
634   PetscInt       i,j,rstart,ncols,flg;
635   PetscInt       *row,*col;
636   PetscBool      other_disassembled;
637   PetscScalar    *val;
638   InsertMode     addv = mat->insertmode;
639 
640   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
641   PetscFunctionBegin;
642   if (!aij->donotstash && !mat->nooffprocentries) {
643     while (1) {
644       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
645       if (!flg) break;
646 
647       for (i=0; i<n;) {
648         /* Now identify the consecutive vals belonging to the same row */
649         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
650         if (j < n) ncols = j-i;
651         else       ncols = n-i;
652         /* Now assemble all these values with a single function call */
653         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
654         i = j;
655       }
656     }
657     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
658   }
659   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
660   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
661 
662   /* determine if any processor has disassembled, if so we must
663      also disassemble ourselfs, in order that we may reassemble. */
664   /*
665      if nonzero structure of submatrix B cannot change then we know that
666      no processor disassembled thus we can skip this stuff
667   */
668   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
669     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
670     if (mat->was_assembled && !other_disassembled) {
671       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
672     }
673   }
674   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
675     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
676   }
677   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
678   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
679   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
681 
682   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
683   aij->rowvalues = 0;
684 
685   /* used by MatAXPY() */
686   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
687   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
688 
689   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
690   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
696 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
697 {
698   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
699   PetscErrorCode ierr;
700 
701   PetscFunctionBegin;
702   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
703   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatZeroRows_MPIAIJ"
709 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
710 {
711   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
712   PetscErrorCode    ierr;
713   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
714   PetscInt          i,*owners = A->rmap->range;
715   PetscInt          *nprocs,j,idx,nsends,row;
716   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
717   PetscInt          *rvalues,count,base,slen,*source;
718   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
719   MPI_Comm          comm = ((PetscObject)A)->comm;
720   MPI_Request       *send_waits,*recv_waits;
721   MPI_Status        recv_status,*send_status;
722   const PetscScalar *xx;
723   PetscScalar       *bb;
724 #if defined(PETSC_DEBUG)
725   PetscBool      found = PETSC_FALSE;
726 #endif
727 
728   PetscFunctionBegin;
729   /*  first count number of contributors to each processor */
730   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
731   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
732   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
733   j = 0;
734   for (i=0; i<N; i++) {
735     if (lastidx > (idx = rows[i])) j = 0;
736     lastidx = idx;
737     for (; j<size; j++) {
738       if (idx >= owners[j] && idx < owners[j+1]) {
739         nprocs[2*j]++;
740         nprocs[2*j+1] = 1;
741         owner[i] = j;
742 #if defined(PETSC_DEBUG)
743         found = PETSC_TRUE;
744 #endif
745         break;
746       }
747     }
748 #if defined(PETSC_DEBUG)
749     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
750     found = PETSC_FALSE;
751 #endif
752   }
753   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
754 
755   if (A->nooffproczerorows) {
756     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
757     nrecvs = nsends;
758     nmax   = N;
759   } else {
760     /* inform other processors of number of messages and max length*/
761     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
762   }
763 
764   /* post receives:   */
765   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
766   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
767   for (i=0; i<nrecvs; i++) {
768     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
769   }
770 
771   /* do sends:
772       1) starts[i] gives the starting index in svalues for stuff going to
773          the ith processor
774   */
775   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
776   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
777   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
778   starts[0] = 0;
779   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
780   for (i=0; i<N; i++) {
781     svalues[starts[owner[i]]++] = rows[i];
782   }
783 
784   starts[0] = 0;
785   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
786   count = 0;
787   for (i=0; i<size; i++) {
788     if (nprocs[2*i+1]) {
789       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
790     }
791   }
792   ierr = PetscFree(starts);CHKERRQ(ierr);
793 
794   base = owners[rank];
795 
796   /*  wait on receives */
797   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
798   count  = nrecvs; slen = 0;
799   while (count) {
800     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
801     /* unpack receives into our local space */
802     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
803     source[imdex]  = recv_status.MPI_SOURCE;
804     lens[imdex]    = n;
805     slen          += n;
806     count--;
807   }
808   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
809 
810   /* move the data into the send scatter */
811   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
812   count = 0;
813   for (i=0; i<nrecvs; i++) {
814     values = rvalues + i*nmax;
815     for (j=0; j<lens[i]; j++) {
816       lrows[count++] = values[j] - base;
817     }
818   }
819   ierr = PetscFree(rvalues);CHKERRQ(ierr);
820   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
821   ierr = PetscFree(owner);CHKERRQ(ierr);
822   ierr = PetscFree(nprocs);CHKERRQ(ierr);
823 
824   /* fix right hand side if needed */
825   if (x && b) {
826     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
827     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
828     for (i=0; i<slen; i++) {
829       bb[lrows[i]] = diag*xx[lrows[i]];
830     }
831     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
832     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
833   }
834   /*
835         Zero the required rows. If the "diagonal block" of the matrix
836      is square and the user wishes to set the diagonal we use separate
837      code so that MatSetValues() is not called for each diagonal allocating
838      new memory, thus calling lots of mallocs and slowing things down.
839 
840   */
841   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
842   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
843   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
844     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
845   } else if (diag != 0.0) {
846     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
848       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
849 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
850     }
851     for (i = 0; i < slen; i++) {
852       row  = lrows[i] + rstart;
853       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
854     }
855     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
856     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
857   } else {
858     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(lrows);CHKERRQ(ierr);
861 
862   /* wait on sends */
863   if (nsends) {
864     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
865     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
866     ierr = PetscFree(send_status);CHKERRQ(ierr);
867   }
868   ierr = PetscFree(send_waits);CHKERRQ(ierr);
869   ierr = PetscFree(svalues);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
875 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
876 {
877   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode    ierr;
879   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
880   PetscInt          i,*owners = A->rmap->range;
881   PetscInt          *nprocs,j,idx,nsends;
882   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
883   PetscInt          *rvalues,count,base,slen,*source;
884   PetscInt          *lens,*lrows,*values,m;
885   MPI_Comm          comm = ((PetscObject)A)->comm;
886   MPI_Request       *send_waits,*recv_waits;
887   MPI_Status        recv_status,*send_status;
888   const PetscScalar *xx;
889   PetscScalar       *bb,*mask;
890   Vec               xmask,lmask;
891   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
892   const PetscInt    *aj, *ii,*ridx;
893   PetscScalar       *aa;
894 #if defined(PETSC_DEBUG)
895   PetscBool         found = PETSC_FALSE;
896 #endif
897 
898   PetscFunctionBegin;
899   /*  first count number of contributors to each processor */
900   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
901   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
902   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
903   j = 0;
904   for (i=0; i<N; i++) {
905     if (lastidx > (idx = rows[i])) j = 0;
906     lastidx = idx;
907     for (; j<size; j++) {
908       if (idx >= owners[j] && idx < owners[j+1]) {
909         nprocs[2*j]++;
910         nprocs[2*j+1] = 1;
911         owner[i] = j;
912 #if defined(PETSC_DEBUG)
913         found = PETSC_TRUE;
914 #endif
915         break;
916       }
917     }
918 #if defined(PETSC_DEBUG)
919     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
920     found = PETSC_FALSE;
921 #endif
922   }
923   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
924 
925   /* inform other processors of number of messages and max length*/
926   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
927 
928   /* post receives:   */
929   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
930   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
931   for (i=0; i<nrecvs; i++) {
932     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
933   }
934 
935   /* do sends:
936       1) starts[i] gives the starting index in svalues for stuff going to
937          the ith processor
938   */
939   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
940   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
941   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
942   starts[0] = 0;
943   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
944   for (i=0; i<N; i++) {
945     svalues[starts[owner[i]]++] = rows[i];
946   }
947 
948   starts[0] = 0;
949   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
950   count = 0;
951   for (i=0; i<size; i++) {
952     if (nprocs[2*i+1]) {
953       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
954     }
955   }
956   ierr = PetscFree(starts);CHKERRQ(ierr);
957 
958   base = owners[rank];
959 
960   /*  wait on receives */
961   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
962   count  = nrecvs; slen = 0;
963   while (count) {
964     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
965     /* unpack receives into our local space */
966     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
967     source[imdex]  = recv_status.MPI_SOURCE;
968     lens[imdex]    = n;
969     slen          += n;
970     count--;
971   }
972   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
973 
974   /* move the data into the send scatter */
975   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
976   count = 0;
977   for (i=0; i<nrecvs; i++) {
978     values = rvalues + i*nmax;
979     for (j=0; j<lens[i]; j++) {
980       lrows[count++] = values[j] - base;
981     }
982   }
983   ierr = PetscFree(rvalues);CHKERRQ(ierr);
984   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
985   ierr = PetscFree(owner);CHKERRQ(ierr);
986   ierr = PetscFree(nprocs);CHKERRQ(ierr);
987   /* lrows are the local rows to be zeroed, slen is the number of local rows */
988 
989   /* zero diagonal part of matrix */
990   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
991 
992   /* handle off diagonal part of matrix */
993   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
994   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
995   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
996   for (i=0; i<slen; i++) {
997     bb[lrows[i]] = 1;
998   }
999   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1000   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1002   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1003   ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1004   ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1006   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1007   ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1008 
1009   /* remove zeroed rows of off diagonal matrix */
1010   ii = aij->i;
1011   for (i=0; i<slen; i++) {
1012     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1013   }
1014 
1015   /* loop over all elements of off process part of matrix zeroing removed columns*/
1016   if (aij->compressedrow.use){
1017     m    = aij->compressedrow.nrows;
1018     ii   = aij->compressedrow.i;
1019     ridx = aij->compressedrow.rindex;
1020     for (i=0; i<m; i++){
1021       n   = ii[i+1] - ii[i];
1022       aj  = aij->j + ii[i];
1023       aa  = aij->a + ii[i];
1024 
1025       for (j=0; j<n; j++) {
1026         if (PetscAbsScalar(mask[*aj])) {
1027           bb[*ridx] -= *aa*xx[*aj];
1028           *aa        = 0.0;
1029         }
1030         aa++;
1031         aj++;
1032       }
1033       ridx++;
1034     }
1035   } else { /* do not use compressed row format */
1036     m = l->B->rmap->n;
1037     for (i=0; i<m; i++) {
1038       n   = ii[i+1] - ii[i];
1039       aj  = aij->j + ii[i];
1040       aa  = aij->a + ii[i];
1041       for (j=0; j<n; j++) {
1042         if (PetscAbsScalar(mask[*aj])) {
1043           bb[i] -= *aa*xx[*aj];
1044           *aa    = 0.0;
1045         }
1046         aa++;
1047         aj++;
1048       }
1049     }
1050   }
1051   ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1052   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1053   ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1054   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1055   ierr = PetscFree(lrows);CHKERRQ(ierr);
1056 
1057   /* wait on sends */
1058   if (nsends) {
1059     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1060     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1061     ierr = PetscFree(send_status);CHKERRQ(ierr);
1062   }
1063   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1064   ierr = PetscFree(svalues);CHKERRQ(ierr);
1065 
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMult_MPIAIJ"
1071 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075   PetscInt       nt;
1076 
1077   PetscFunctionBegin;
1078   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1079   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1080   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1081   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1082   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1083   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1089 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1090 {
1091   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1101 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1102 {
1103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1108   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1109   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1116 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1117 {
1118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1119   PetscErrorCode ierr;
1120   PetscBool      merged;
1121 
1122   PetscFunctionBegin;
1123   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1124   /* do nondiagonal part */
1125   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1126   if (!merged) {
1127     /* send it on its way */
1128     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1129     /* do local part */
1130     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1131     /* receive remote parts: note this assumes the values are not actually */
1132     /* added in yy until the next line, */
1133     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1134   } else {
1135     /* do local part */
1136     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1137     /* send it on its way */
1138     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1139     /* values actually were received in the Begin() but we need to call this nop */
1140     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1141   }
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 EXTERN_C_BEGIN
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1148 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1149 {
1150   MPI_Comm       comm;
1151   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1152   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1153   IS             Me,Notme;
1154   PetscErrorCode ierr;
1155   PetscInt       M,N,first,last,*notme,i;
1156   PetscMPIInt    size;
1157 
1158   PetscFunctionBegin;
1159 
1160   /* Easy test: symmetric diagonal block */
1161   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1162   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1163   if (!*f) PetscFunctionReturn(0);
1164   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1165   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1166   if (size == 1) PetscFunctionReturn(0);
1167 
1168   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1169   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1170   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1171   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1172   for (i=0; i<first; i++) notme[i] = i;
1173   for (i=last; i<M; i++) notme[i-last+first] = i;
1174   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1175   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1176   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1177   Aoff = Aoffs[0];
1178   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1179   Boff = Boffs[0];
1180   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1181   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1182   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1183   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1184   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1185   ierr = PetscFree(notme);CHKERRQ(ierr);
1186    PetscFunctionReturn(0);
1187 }
1188 EXTERN_C_END
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1192 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1193 {
1194   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1195   PetscErrorCode ierr;
1196 
1197   PetscFunctionBegin;
1198   /* do nondiagonal part */
1199   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1200   /* send it on its way */
1201   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1202   /* do local part */
1203   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1204   /* receive remote parts */
1205   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 /*
1210   This only works correctly for square matrices where the subblock A->A is the
1211    diagonal block
1212 */
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1215 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1216 {
1217   PetscErrorCode ierr;
1218   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1219 
1220   PetscFunctionBegin;
1221   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1222   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1223   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatScale_MPIAIJ"
1229 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1230 {
1231   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1236   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatDestroy_MPIAIJ"
1242 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1243 {
1244   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1245   PetscErrorCode ierr;
1246 
1247   PetscFunctionBegin;
1248 #if defined(PETSC_USE_LOG)
1249   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1250 #endif
1251   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1252   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1253   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1254   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1255 #if defined (PETSC_USE_CTABLE)
1256   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1257 #else
1258   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1259 #endif
1260   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1261   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1262   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1263   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1264   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1265   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1266 
1267   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1275   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1281 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1282 {
1283   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1284   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1285   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1286   PetscErrorCode    ierr;
1287   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1288   int               fd;
1289   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1290   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1291   PetscScalar       *column_values;
1292   PetscInt          message_count,flowcontrolcount;
1293 
1294   PetscFunctionBegin;
1295   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1296   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1297   nz   = A->nz + B->nz;
1298   if (!rank) {
1299     header[0] = MAT_FILE_CLASSID;
1300     header[1] = mat->rmap->N;
1301     header[2] = mat->cmap->N;
1302     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1303     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1304     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1305     /* get largest number of rows any processor has */
1306     rlen = mat->rmap->n;
1307     range = mat->rmap->range;
1308     for (i=1; i<size; i++) {
1309       rlen = PetscMax(rlen,range[i+1] - range[i]);
1310     }
1311   } else {
1312     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1313     rlen = mat->rmap->n;
1314   }
1315 
1316   /* load up the local row counts */
1317   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1318   for (i=0; i<mat->rmap->n; i++) {
1319     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1320   }
1321 
1322   /* store the row lengths to the file */
1323   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1324   if (!rank) {
1325     MPI_Status status;
1326     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1327     for (i=1; i<size; i++) {
1328       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1329       rlen = range[i+1] - range[i];
1330       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1331       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1332     }
1333     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1334   } else {
1335     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1336     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1337     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1338   }
1339   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1340 
1341   /* load up the local column indices */
1342   nzmax = nz; /* )th processor needs space a largest processor needs */
1343   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1344   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1345   cnt  = 0;
1346   for (i=0; i<mat->rmap->n; i++) {
1347     for (j=B->i[i]; j<B->i[i+1]; j++) {
1348       if ( (col = garray[B->j[j]]) > cstart) break;
1349       column_indices[cnt++] = col;
1350     }
1351     for (k=A->i[i]; k<A->i[i+1]; k++) {
1352       column_indices[cnt++] = A->j[k] + cstart;
1353     }
1354     for (; j<B->i[i+1]; j++) {
1355       column_indices[cnt++] = garray[B->j[j]];
1356     }
1357   }
1358   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1359 
1360   /* store the column indices to the file */
1361    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1362   if (!rank) {
1363     MPI_Status status;
1364     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1365     for (i=1; i<size; i++) {
1366       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1367       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1368       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1369       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1370       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1371     }
1372      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1373   } else {
1374     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1375     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1376     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1377     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1378   }
1379   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1380 
1381   /* load up the local column values */
1382   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1383   cnt  = 0;
1384   for (i=0; i<mat->rmap->n; i++) {
1385     for (j=B->i[i]; j<B->i[i+1]; j++) {
1386       if ( garray[B->j[j]] > cstart) break;
1387       column_values[cnt++] = B->a[j];
1388     }
1389     for (k=A->i[i]; k<A->i[i+1]; k++) {
1390       column_values[cnt++] = A->a[k];
1391     }
1392     for (; j<B->i[i+1]; j++) {
1393       column_values[cnt++] = B->a[j];
1394     }
1395   }
1396   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1397 
1398   /* store the column values to the file */
1399    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1400   if (!rank) {
1401     MPI_Status status;
1402     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1403     for (i=1; i<size; i++) {
1404        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1405       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1406       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1407       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1408       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1409     }
1410     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1411   } else {
1412     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1413     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1414     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1415     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1416   }
1417   ierr = PetscFree(column_values);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1423 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1424 {
1425   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1426   PetscErrorCode    ierr;
1427   PetscMPIInt       rank = aij->rank,size = aij->size;
1428   PetscBool         isdraw,iascii,isbinary;
1429   PetscViewer       sviewer;
1430   PetscViewerFormat format;
1431 
1432   PetscFunctionBegin;
1433   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1434   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1435   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1436   if (iascii) {
1437     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1438     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1439       MatInfo    info;
1440       PetscBool  inodes;
1441 
1442       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1443       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1444       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1445       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1446       if (!inodes) {
1447         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1448 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1449       } else {
1450         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1451 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1452       }
1453       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1454       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1455       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1456       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1457       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1458       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1459       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1460       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1461       PetscFunctionReturn(0);
1462     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1463       PetscInt   inodecount,inodelimit,*inodes;
1464       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1465       if (inodes) {
1466         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1467       } else {
1468         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1469       }
1470       PetscFunctionReturn(0);
1471     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1472       PetscFunctionReturn(0);
1473     }
1474   } else if (isbinary) {
1475     if (size == 1) {
1476       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1477       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1478     } else {
1479       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1480     }
1481     PetscFunctionReturn(0);
1482   } else if (isdraw) {
1483     PetscDraw  draw;
1484     PetscBool  isnull;
1485     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1486     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1487   }
1488 
1489   if (size == 1) {
1490     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1491     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1492   } else {
1493     /* assemble the entire matrix onto first processor. */
1494     Mat         A;
1495     Mat_SeqAIJ  *Aloc;
1496     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1497     MatScalar   *a;
1498 
1499     if (mat->rmap->N > 1024) {
1500       PetscBool  flg = PETSC_FALSE;
1501 
1502       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1503       if (!flg) {
1504         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1505       }
1506     }
1507 
1508     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1509     if (!rank) {
1510       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1511     } else {
1512       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1513     }
1514     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1515     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1516     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1517     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1518 
1519     /* copy over the A part */
1520     Aloc = (Mat_SeqAIJ*)aij->A->data;
1521     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1522     row = mat->rmap->rstart;
1523     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1524     for (i=0; i<m; i++) {
1525       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1526       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1527     }
1528     aj = Aloc->j;
1529     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1530 
1531     /* copy over the B part */
1532     Aloc = (Mat_SeqAIJ*)aij->B->data;
1533     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1534     row  = mat->rmap->rstart;
1535     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1536     ct   = cols;
1537     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1538     for (i=0; i<m; i++) {
1539       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1540       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1541     }
1542     ierr = PetscFree(ct);CHKERRQ(ierr);
1543     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1544     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1545     /*
1546        Everyone has to call to draw the matrix since the graphics waits are
1547        synchronized across all processors that share the PetscDraw object
1548     */
1549     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1550     if (!rank) {
1551       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1552       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1553       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1554       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1555     }
1556     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1557     ierr = MatDestroy(&A);CHKERRQ(ierr);
1558   }
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatView_MPIAIJ"
1564 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1565 {
1566   PetscErrorCode ierr;
1567   PetscBool      iascii,isdraw,issocket,isbinary;
1568 
1569   PetscFunctionBegin;
1570   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1571   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1572   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1573   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1574   if (iascii || isdraw || isbinary || issocket) {
1575     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1576   } else {
1577     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1578   }
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatSOR_MPIAIJ"
1584 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1585 {
1586   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1587   PetscErrorCode ierr;
1588   Vec            bb1 = 0;
1589   PetscBool      hasop;
1590 
1591   PetscFunctionBegin;
1592   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1593     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1594   }
1595 
1596   if (flag == SOR_APPLY_UPPER) {
1597     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1598     PetscFunctionReturn(0);
1599   }
1600 
1601   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1602     if (flag & SOR_ZERO_INITIAL_GUESS) {
1603       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1604       its--;
1605     }
1606 
1607     while (its--) {
1608       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1609       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1610 
1611       /* update rhs: bb1 = bb - B*x */
1612       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1613       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1614 
1615       /* local sweep */
1616       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1617     }
1618   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1619     if (flag & SOR_ZERO_INITIAL_GUESS) {
1620       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1621       its--;
1622     }
1623     while (its--) {
1624       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1626 
1627       /* update rhs: bb1 = bb - B*x */
1628       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1629       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1630 
1631       /* local sweep */
1632       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1633     }
1634   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1635     if (flag & SOR_ZERO_INITIAL_GUESS) {
1636       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1637       its--;
1638     }
1639     while (its--) {
1640       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1641       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1642 
1643       /* update rhs: bb1 = bb - B*x */
1644       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1645       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1646 
1647       /* local sweep */
1648       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1649     }
1650   }  else if (flag & SOR_EISENSTAT) {
1651     Vec         xx1;
1652 
1653     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1654     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1655 
1656     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1658     if (!mat->diag) {
1659       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1660       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1661     }
1662     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1663     if (hasop) {
1664       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1665     } else {
1666       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1667     }
1668     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1669 
1670     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1671 
1672     /* local sweep */
1673     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1674     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1675     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1676   } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1677 
1678   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatPermute_MPIAIJ"
1684 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1685 {
1686   MPI_Comm       comm,pcomm;
1687   PetscInt       first,local_size,nrows;
1688   const PetscInt *rows;
1689   PetscMPIInt    size;
1690   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1691   PetscErrorCode ierr;
1692 
1693   PetscFunctionBegin;
1694   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1695   /* make a collective version of 'rowp' */
1696   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1697   if (pcomm==comm) {
1698     crowp = rowp;
1699   } else {
1700     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1701     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1702     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1703     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1704   }
1705   /* collect the global row permutation and invert it */
1706   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1707   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1708   if (pcomm!=comm) {
1709     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1710   }
1711   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1712   /* get the local target indices */
1713   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1714   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1715   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1716   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1717   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1718   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1719   /* the column permutation is so much easier;
1720      make a local version of 'colp' and invert it */
1721   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1722   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1723   if (size==1) {
1724     lcolp = colp;
1725   } else {
1726     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1727     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1728     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1729   }
1730   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1731   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1732   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1733   if (size>1) {
1734     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1735     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1736   }
1737   /* now we just get the submatrix */
1738   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1739   /* clean up */
1740   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1741   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 #undef __FUNCT__
1746 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1747 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1748 {
1749   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1750   Mat            A = mat->A,B = mat->B;
1751   PetscErrorCode ierr;
1752   PetscReal      isend[5],irecv[5];
1753 
1754   PetscFunctionBegin;
1755   info->block_size     = 1.0;
1756   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1757   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1758   isend[3] = info->memory;  isend[4] = info->mallocs;
1759   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1760   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1761   isend[3] += info->memory;  isend[4] += info->mallocs;
1762   if (flag == MAT_LOCAL) {
1763     info->nz_used      = isend[0];
1764     info->nz_allocated = isend[1];
1765     info->nz_unneeded  = isend[2];
1766     info->memory       = isend[3];
1767     info->mallocs      = isend[4];
1768   } else if (flag == MAT_GLOBAL_MAX) {
1769     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1770     info->nz_used      = irecv[0];
1771     info->nz_allocated = irecv[1];
1772     info->nz_unneeded  = irecv[2];
1773     info->memory       = irecv[3];
1774     info->mallocs      = irecv[4];
1775   } else if (flag == MAT_GLOBAL_SUM) {
1776     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1777     info->nz_used      = irecv[0];
1778     info->nz_allocated = irecv[1];
1779     info->nz_unneeded  = irecv[2];
1780     info->memory       = irecv[3];
1781     info->mallocs      = irecv[4];
1782   }
1783   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1784   info->fill_ratio_needed = 0;
1785   info->factor_mallocs    = 0;
1786 
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 #undef __FUNCT__
1791 #define __FUNCT__ "MatSetOption_MPIAIJ"
1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1793 {
1794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1795   PetscErrorCode ierr;
1796 
1797   PetscFunctionBegin;
1798   switch (op) {
1799   case MAT_NEW_NONZERO_LOCATIONS:
1800   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1801   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1802   case MAT_KEEP_NONZERO_PATTERN:
1803   case MAT_NEW_NONZERO_LOCATION_ERR:
1804   case MAT_USE_INODES:
1805   case MAT_IGNORE_ZERO_ENTRIES:
1806     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1807     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1808     break;
1809   case MAT_ROW_ORIENTED:
1810     a->roworiented = flg;
1811     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1812     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1813     break;
1814   case MAT_NEW_DIAGONALS:
1815     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1816     break;
1817   case MAT_IGNORE_OFF_PROC_ENTRIES:
1818     a->donotstash = flg;
1819     break;
1820   case MAT_SPD:
1821     A->spd_set                         = PETSC_TRUE;
1822     A->spd                             = flg;
1823     if (flg) {
1824       A->symmetric                     = PETSC_TRUE;
1825       A->structurally_symmetric        = PETSC_TRUE;
1826       A->symmetric_set                 = PETSC_TRUE;
1827       A->structurally_symmetric_set    = PETSC_TRUE;
1828     }
1829     break;
1830   case MAT_SYMMETRIC:
1831     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1832     break;
1833   case MAT_STRUCTURALLY_SYMMETRIC:
1834     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1835     break;
1836   case MAT_HERMITIAN:
1837     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1838     break;
1839   case MAT_SYMMETRY_ETERNAL:
1840     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1841     break;
1842   default:
1843     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1844   }
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatGetRow_MPIAIJ"
1850 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1851 {
1852   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1853   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1854   PetscErrorCode ierr;
1855   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1856   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1857   PetscInt       *cmap,*idx_p;
1858 
1859   PetscFunctionBegin;
1860   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1861   mat->getrowactive = PETSC_TRUE;
1862 
1863   if (!mat->rowvalues && (idx || v)) {
1864     /*
1865         allocate enough space to hold information from the longest row.
1866     */
1867     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1868     PetscInt   max = 1,tmp;
1869     for (i=0; i<matin->rmap->n; i++) {
1870       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1871       if (max < tmp) { max = tmp; }
1872     }
1873     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1874   }
1875 
1876   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1877   lrow = row - rstart;
1878 
1879   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1880   if (!v)   {pvA = 0; pvB = 0;}
1881   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1882   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1883   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1884   nztot = nzA + nzB;
1885 
1886   cmap  = mat->garray;
1887   if (v  || idx) {
1888     if (nztot) {
1889       /* Sort by increasing column numbers, assuming A and B already sorted */
1890       PetscInt imark = -1;
1891       if (v) {
1892         *v = v_p = mat->rowvalues;
1893         for (i=0; i<nzB; i++) {
1894           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1895           else break;
1896         }
1897         imark = i;
1898         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1899         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1900       }
1901       if (idx) {
1902         *idx = idx_p = mat->rowindices;
1903         if (imark > -1) {
1904           for (i=0; i<imark; i++) {
1905             idx_p[i] = cmap[cworkB[i]];
1906           }
1907         } else {
1908           for (i=0; i<nzB; i++) {
1909             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1910             else break;
1911           }
1912           imark = i;
1913         }
1914         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1915         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1916       }
1917     } else {
1918       if (idx) *idx = 0;
1919       if (v)   *v   = 0;
1920     }
1921   }
1922   *nz = nztot;
1923   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1924   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 #undef __FUNCT__
1929 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1930 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1931 {
1932   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1933 
1934   PetscFunctionBegin;
1935   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1936   aij->getrowactive = PETSC_FALSE;
1937   PetscFunctionReturn(0);
1938 }
1939 
1940 #undef __FUNCT__
1941 #define __FUNCT__ "MatNorm_MPIAIJ"
1942 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1943 {
1944   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1945   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1946   PetscErrorCode ierr;
1947   PetscInt       i,j,cstart = mat->cmap->rstart;
1948   PetscReal      sum = 0.0;
1949   MatScalar      *v;
1950 
1951   PetscFunctionBegin;
1952   if (aij->size == 1) {
1953     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1954   } else {
1955     if (type == NORM_FROBENIUS) {
1956       v = amat->a;
1957       for (i=0; i<amat->nz; i++) {
1958 #if defined(PETSC_USE_COMPLEX)
1959         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1960 #else
1961         sum += (*v)*(*v); v++;
1962 #endif
1963       }
1964       v = bmat->a;
1965       for (i=0; i<bmat->nz; i++) {
1966 #if defined(PETSC_USE_COMPLEX)
1967         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1968 #else
1969         sum += (*v)*(*v); v++;
1970 #endif
1971       }
1972       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1973       *norm = PetscSqrtReal(*norm);
1974     } else if (type == NORM_1) { /* max column norm */
1975       PetscReal *tmp,*tmp2;
1976       PetscInt  *jj,*garray = aij->garray;
1977       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1978       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1979       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1980       *norm = 0.0;
1981       v = amat->a; jj = amat->j;
1982       for (j=0; j<amat->nz; j++) {
1983         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1984       }
1985       v = bmat->a; jj = bmat->j;
1986       for (j=0; j<bmat->nz; j++) {
1987         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1988       }
1989       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1990       for (j=0; j<mat->cmap->N; j++) {
1991         if (tmp2[j] > *norm) *norm = tmp2[j];
1992       }
1993       ierr = PetscFree(tmp);CHKERRQ(ierr);
1994       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1995     } else if (type == NORM_INFINITY) { /* max row norm */
1996       PetscReal ntemp = 0.0;
1997       for (j=0; j<aij->A->rmap->n; j++) {
1998         v = amat->a + amat->i[j];
1999         sum = 0.0;
2000         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2001           sum += PetscAbsScalar(*v); v++;
2002         }
2003         v = bmat->a + bmat->i[j];
2004         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2005           sum += PetscAbsScalar(*v); v++;
2006         }
2007         if (sum > ntemp) ntemp = sum;
2008       }
2009       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
2010     } else {
2011       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
2012     }
2013   }
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 #undef __FUNCT__
2018 #define __FUNCT__ "MatTranspose_MPIAIJ"
2019 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2020 {
2021   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2022   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2023   PetscErrorCode ierr;
2024   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2025   PetscInt       cstart=A->cmap->rstart,ncol;
2026   Mat            B;
2027   MatScalar      *array;
2028 
2029   PetscFunctionBegin;
2030   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2031 
2032   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2033   ai = Aloc->i; aj = Aloc->j;
2034   bi = Bloc->i; bj = Bloc->j;
2035   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2036     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2037     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2038     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2039     for (i=0; i<ai[ma]; i++){
2040       d_nnz[aj[i]] ++;
2041       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2042     }
2043 
2044     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2045     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2046     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2047     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2048     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2049   } else {
2050     B = *matout;
2051   }
2052 
2053   /* copy over the A part */
2054   array = Aloc->a;
2055   row = A->rmap->rstart;
2056   for (i=0; i<ma; i++) {
2057     ncol = ai[i+1]-ai[i];
2058     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2059     row++; array += ncol; aj += ncol;
2060   }
2061   aj = Aloc->j;
2062   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2063 
2064   /* copy over the B part */
2065   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2066   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2067   array = Bloc->a;
2068   row = A->rmap->rstart;
2069   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2070   cols_tmp = cols;
2071   for (i=0; i<mb; i++) {
2072     ncol = bi[i+1]-bi[i];
2073     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2074     row++; array += ncol; cols_tmp += ncol;
2075   }
2076   ierr = PetscFree(cols);CHKERRQ(ierr);
2077 
2078   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2080   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2081     *matout = B;
2082   } else {
2083     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2084   }
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 #undef __FUNCT__
2089 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2090 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2091 {
2092   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2093   Mat            a = aij->A,b = aij->B;
2094   PetscErrorCode ierr;
2095   PetscInt       s1,s2,s3;
2096 
2097   PetscFunctionBegin;
2098   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2099   if (rr) {
2100     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2101     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2102     /* Overlap communication with computation. */
2103     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2104   }
2105   if (ll) {
2106     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2107     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2108     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2109   }
2110   /* scale  the diagonal block */
2111   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2112 
2113   if (rr) {
2114     /* Do a scatter end and then right scale the off-diagonal block */
2115     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2116     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2117   }
2118 
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 #undef __FUNCT__
2123 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2124 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2125 {
2126   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2127   PetscErrorCode ierr;
2128 
2129   PetscFunctionBegin;
2130   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2131   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2132   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2133   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2134   PetscFunctionReturn(0);
2135 }
2136 #undef __FUNCT__
2137 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2138 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2139 {
2140   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2141   PetscErrorCode ierr;
2142 
2143   PetscFunctionBegin;
2144   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 #undef __FUNCT__
2149 #define __FUNCT__ "MatEqual_MPIAIJ"
2150 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2151 {
2152   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2153   Mat            a,b,c,d;
2154   PetscBool      flg;
2155   PetscErrorCode ierr;
2156 
2157   PetscFunctionBegin;
2158   a = matA->A; b = matA->B;
2159   c = matB->A; d = matB->B;
2160 
2161   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2162   if (flg) {
2163     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2164   }
2165   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatCopy_MPIAIJ"
2171 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2172 {
2173   PetscErrorCode ierr;
2174   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2175   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2176 
2177   PetscFunctionBegin;
2178   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2179   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2180     /* because of the column compression in the off-processor part of the matrix a->B,
2181        the number of columns in a->B and b->B may be different, hence we cannot call
2182        the MatCopy() directly on the two parts. If need be, we can provide a more
2183        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2184        then copying the submatrices */
2185     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2186   } else {
2187     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2188     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2189   }
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 #undef __FUNCT__
2194 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
2195 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
2196 {
2197   PetscErrorCode ierr;
2198 
2199   PetscFunctionBegin;
2200   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 #undef __FUNCT__
2205 #define __FUNCT__ "MatAXPY_MPIAIJ"
2206 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2207 {
2208   PetscErrorCode ierr;
2209   PetscInt       i;
2210   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2211   PetscBLASInt   bnz,one=1;
2212   Mat_SeqAIJ     *x,*y;
2213 
2214   PetscFunctionBegin;
2215   if (str == SAME_NONZERO_PATTERN) {
2216     PetscScalar alpha = a;
2217     x = (Mat_SeqAIJ *)xx->A->data;
2218     y = (Mat_SeqAIJ *)yy->A->data;
2219     bnz = PetscBLASIntCast(x->nz);
2220     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2221     x = (Mat_SeqAIJ *)xx->B->data;
2222     y = (Mat_SeqAIJ *)yy->B->data;
2223     bnz = PetscBLASIntCast(x->nz);
2224     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2225   } else if (str == SUBSET_NONZERO_PATTERN) {
2226     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2227 
2228     x = (Mat_SeqAIJ *)xx->B->data;
2229     y = (Mat_SeqAIJ *)yy->B->data;
2230     if (y->xtoy && y->XtoY != xx->B) {
2231       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2232       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2233     }
2234     if (!y->xtoy) { /* get xtoy */
2235       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2236       y->XtoY = xx->B;
2237       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2238     }
2239     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2240   } else {
2241     Mat B;
2242     PetscInt *nnz_d,*nnz_o;
2243     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2244     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2245     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2246     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2247     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2248     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2249     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2250     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2251     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
2252     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2253     ierr = MatHeaderReplace(Y,B);
2254     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2255     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2256   }
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2261 
2262 #undef __FUNCT__
2263 #define __FUNCT__ "MatConjugate_MPIAIJ"
2264 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2265 {
2266 #if defined(PETSC_USE_COMPLEX)
2267   PetscErrorCode ierr;
2268   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2269 
2270   PetscFunctionBegin;
2271   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2272   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2273 #else
2274   PetscFunctionBegin;
2275 #endif
2276   PetscFunctionReturn(0);
2277 }
2278 
2279 #undef __FUNCT__
2280 #define __FUNCT__ "MatRealPart_MPIAIJ"
2281 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2282 {
2283   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2284   PetscErrorCode ierr;
2285 
2286   PetscFunctionBegin;
2287   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2288   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 #undef __FUNCT__
2293 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2294 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2295 {
2296   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2297   PetscErrorCode ierr;
2298 
2299   PetscFunctionBegin;
2300   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2301   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2302   PetscFunctionReturn(0);
2303 }
2304 
2305 #ifdef PETSC_HAVE_PBGL
2306 
2307 #include <boost/parallel/mpi/bsp_process_group.hpp>
2308 #include <boost/graph/distributed/ilu_default_graph.hpp>
2309 #include <boost/graph/distributed/ilu_0_block.hpp>
2310 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2311 #include <boost/graph/distributed/petsc/interface.hpp>
2312 #include <boost/multi_array.hpp>
2313 #include <boost/parallel/distributed_property_map->hpp>
2314 
2315 #undef __FUNCT__
2316 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2317 /*
2318   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2319 */
2320 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2321 {
2322   namespace petsc = boost::distributed::petsc;
2323 
2324   namespace graph_dist = boost::graph::distributed;
2325   using boost::graph::distributed::ilu_default::process_group_type;
2326   using boost::graph::ilu_permuted;
2327 
2328   PetscBool       row_identity, col_identity;
2329   PetscContainer  c;
2330   PetscInt        m, n, M, N;
2331   PetscErrorCode  ierr;
2332 
2333   PetscFunctionBegin;
2334   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2335   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2336   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2337   if (!row_identity || !col_identity) {
2338     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2339   }
2340 
2341   process_group_type pg;
2342   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2343   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2344   lgraph_type&   level_graph = *lgraph_p;
2345   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2346 
2347   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2348   ilu_permuted(level_graph);
2349 
2350   /* put together the new matrix */
2351   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2352   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2353   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2354   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2355   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2356   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2357   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2358 
2359   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2360   ierr = PetscContainerSetPointer(c, lgraph_p);
2361   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2362   ierr = PetscContainerDestroy(&c);
2363   PetscFunctionReturn(0);
2364 }
2365 
2366 #undef __FUNCT__
2367 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2368 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2369 {
2370   PetscFunctionBegin;
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 #undef __FUNCT__
2375 #define __FUNCT__ "MatSolve_MPIAIJ"
2376 /*
2377   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2378 */
2379 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2380 {
2381   namespace graph_dist = boost::graph::distributed;
2382 
2383   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2384   lgraph_type*   lgraph_p;
2385   PetscContainer c;
2386   PetscErrorCode ierr;
2387 
2388   PetscFunctionBegin;
2389   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2390   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2391   ierr = VecCopy(b, x);CHKERRQ(ierr);
2392 
2393   PetscScalar* array_x;
2394   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2395   PetscInt sx;
2396   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2397 
2398   PetscScalar* array_b;
2399   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2400   PetscInt sb;
2401   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2402 
2403   lgraph_type&   level_graph = *lgraph_p;
2404   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2405 
2406   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2407   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2408                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2409 
2410   typedef boost::iterator_property_map<array_ref_type::iterator,
2411                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2412   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2413                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2414 
2415   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2416 
2417   PetscFunctionReturn(0);
2418 }
2419 #endif
2420 
2421 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2422   PetscInt       nzlocal,nsends,nrecvs;
2423   PetscMPIInt    *send_rank,*recv_rank;
2424   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2425   PetscScalar    *sbuf_a,**rbuf_a;
2426   PetscErrorCode (*Destroy)(Mat);
2427 } Mat_Redundant;
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2431 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2432 {
2433   PetscErrorCode       ierr;
2434   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2435   PetscInt             i;
2436 
2437   PetscFunctionBegin;
2438   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2439   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2440   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2441   for (i=0; i<redund->nrecvs; i++){
2442     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2443     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2444   }
2445   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2446   ierr = PetscFree(redund);CHKERRQ(ierr);
2447   PetscFunctionReturn(0);
2448 }
2449 
2450 #undef __FUNCT__
2451 #define __FUNCT__ "MatDestroy_MatRedundant"
2452 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2453 {
2454   PetscErrorCode  ierr;
2455   PetscContainer  container;
2456   Mat_Redundant   *redund=PETSC_NULL;
2457 
2458   PetscFunctionBegin;
2459   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2460   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2461   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2462   A->ops->destroy = redund->Destroy;
2463   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2464   if (A->ops->destroy) {
2465     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2466   }
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 #undef __FUNCT__
2471 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2472 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2473 {
2474   PetscMPIInt    rank,size;
2475   MPI_Comm       comm=((PetscObject)mat)->comm;
2476   PetscErrorCode ierr;
2477   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2478   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2479   PetscInt       *rowrange=mat->rmap->range;
2480   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2481   Mat            A=aij->A,B=aij->B,C=*matredundant;
2482   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2483   PetscScalar    *sbuf_a;
2484   PetscInt       nzlocal=a->nz+b->nz;
2485   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2486   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2487   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2488   MatScalar      *aworkA,*aworkB;
2489   PetscScalar    *vals;
2490   PetscMPIInt    tag1,tag2,tag3,imdex;
2491   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2492                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2493   MPI_Status     recv_status,*send_status;
2494   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2495   PetscInt       **rbuf_j=PETSC_NULL;
2496   PetscScalar    **rbuf_a=PETSC_NULL;
2497   Mat_Redundant  *redund=PETSC_NULL;
2498   PetscContainer container;
2499 
2500   PetscFunctionBegin;
2501   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2502   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2503 
2504   if (reuse == MAT_REUSE_MATRIX) {
2505     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2506     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2507     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2508     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2509     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2510     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2511     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2512     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2513 
2514     nsends    = redund->nsends;
2515     nrecvs    = redund->nrecvs;
2516     send_rank = redund->send_rank;
2517     recv_rank = redund->recv_rank;
2518     sbuf_nz   = redund->sbuf_nz;
2519     rbuf_nz   = redund->rbuf_nz;
2520     sbuf_j    = redund->sbuf_j;
2521     sbuf_a    = redund->sbuf_a;
2522     rbuf_j    = redund->rbuf_j;
2523     rbuf_a    = redund->rbuf_a;
2524   }
2525 
2526   if (reuse == MAT_INITIAL_MATRIX){
2527     PetscMPIInt  subrank,subsize;
2528     PetscInt     nleftover,np_subcomm;
2529     /* get the destination processors' id send_rank, nsends and nrecvs */
2530     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2531     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2532     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2533     np_subcomm = size/nsubcomm;
2534     nleftover  = size - nsubcomm*np_subcomm;
2535     nsends = 0; nrecvs = 0;
2536     for (i=0; i<size; i++){ /* i=rank*/
2537       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2538         send_rank[nsends] = i; nsends++;
2539         recv_rank[nrecvs++] = i;
2540       }
2541     }
2542     if (rank >= size - nleftover){/* this proc is a leftover processor */
2543       i = size-nleftover-1;
2544       j = 0;
2545       while (j < nsubcomm - nleftover){
2546         send_rank[nsends++] = i;
2547         i--; j++;
2548       }
2549     }
2550 
2551     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2552       for (i=0; i<nleftover; i++){
2553         recv_rank[nrecvs++] = size-nleftover+i;
2554       }
2555     }
2556 
2557     /* allocate sbuf_j, sbuf_a */
2558     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2559     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2560     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2561   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2562 
2563   /* copy mat's local entries into the buffers */
2564   if (reuse == MAT_INITIAL_MATRIX){
2565     rownz_max = 0;
2566     rptr = sbuf_j;
2567     cols = sbuf_j + rend-rstart + 1;
2568     vals = sbuf_a;
2569     rptr[0] = 0;
2570     for (i=0; i<rend-rstart; i++){
2571       row = i + rstart;
2572       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2573       ncols  = nzA + nzB;
2574       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2575       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2576       /* load the column indices for this row into cols */
2577       lwrite = 0;
2578       for (l=0; l<nzB; l++) {
2579         if ((ctmp = bmap[cworkB[l]]) < cstart){
2580           vals[lwrite]   = aworkB[l];
2581           cols[lwrite++] = ctmp;
2582         }
2583       }
2584       for (l=0; l<nzA; l++){
2585         vals[lwrite]   = aworkA[l];
2586         cols[lwrite++] = cstart + cworkA[l];
2587       }
2588       for (l=0; l<nzB; l++) {
2589         if ((ctmp = bmap[cworkB[l]]) >= cend){
2590           vals[lwrite]   = aworkB[l];
2591           cols[lwrite++] = ctmp;
2592         }
2593       }
2594       vals += ncols;
2595       cols += ncols;
2596       rptr[i+1] = rptr[i] + ncols;
2597       if (rownz_max < ncols) rownz_max = ncols;
2598     }
2599     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2600   } else { /* only copy matrix values into sbuf_a */
2601     rptr = sbuf_j;
2602     vals = sbuf_a;
2603     rptr[0] = 0;
2604     for (i=0; i<rend-rstart; i++){
2605       row = i + rstart;
2606       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2607       ncols  = nzA + nzB;
2608       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2609       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2610       lwrite = 0;
2611       for (l=0; l<nzB; l++) {
2612         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2613       }
2614       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2615       for (l=0; l<nzB; l++) {
2616         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2617       }
2618       vals += ncols;
2619       rptr[i+1] = rptr[i] + ncols;
2620     }
2621   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2622 
2623   /* send nzlocal to others, and recv other's nzlocal */
2624   /*--------------------------------------------------*/
2625   if (reuse == MAT_INITIAL_MATRIX){
2626     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2627     s_waits2 = s_waits3 + nsends;
2628     s_waits1 = s_waits2 + nsends;
2629     r_waits1 = s_waits1 + nsends;
2630     r_waits2 = r_waits1 + nrecvs;
2631     r_waits3 = r_waits2 + nrecvs;
2632   } else {
2633     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2634     r_waits3 = s_waits3 + nsends;
2635   }
2636 
2637   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2638   if (reuse == MAT_INITIAL_MATRIX){
2639     /* get new tags to keep the communication clean */
2640     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2641     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2642     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2643 
2644     /* post receives of other's nzlocal */
2645     for (i=0; i<nrecvs; i++){
2646       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2647     }
2648     /* send nzlocal to others */
2649     for (i=0; i<nsends; i++){
2650       sbuf_nz[i] = nzlocal;
2651       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2652     }
2653     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2654     count = nrecvs;
2655     while (count) {
2656       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2657       recv_rank[imdex] = recv_status.MPI_SOURCE;
2658       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2659       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2660 
2661       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2662       rbuf_nz[imdex] += i + 2;
2663       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2664       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2665       count--;
2666     }
2667     /* wait on sends of nzlocal */
2668     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2669     /* send mat->i,j to others, and recv from other's */
2670     /*------------------------------------------------*/
2671     for (i=0; i<nsends; i++){
2672       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2673       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2674     }
2675     /* wait on receives of mat->i,j */
2676     /*------------------------------*/
2677     count = nrecvs;
2678     while (count) {
2679       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2680       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2681       count--;
2682     }
2683     /* wait on sends of mat->i,j */
2684     /*---------------------------*/
2685     if (nsends) {
2686       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2687     }
2688   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2689 
2690   /* post receives, send and receive mat->a */
2691   /*----------------------------------------*/
2692   for (imdex=0; imdex<nrecvs; imdex++) {
2693     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2694   }
2695   for (i=0; i<nsends; i++){
2696     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2697   }
2698   count = nrecvs;
2699   while (count) {
2700     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2701     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2702     count--;
2703   }
2704   if (nsends) {
2705     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2706   }
2707 
2708   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2709 
2710   /* create redundant matrix */
2711   /*-------------------------*/
2712   if (reuse == MAT_INITIAL_MATRIX){
2713     /* compute rownz_max for preallocation */
2714     for (imdex=0; imdex<nrecvs; imdex++){
2715       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2716       rptr = rbuf_j[imdex];
2717       for (i=0; i<j; i++){
2718         ncols = rptr[i+1] - rptr[i];
2719         if (rownz_max < ncols) rownz_max = ncols;
2720       }
2721     }
2722 
2723     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2724     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2725     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2726     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2727     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2728   } else {
2729     C = *matredundant;
2730   }
2731 
2732   /* insert local matrix entries */
2733   rptr = sbuf_j;
2734   cols = sbuf_j + rend-rstart + 1;
2735   vals = sbuf_a;
2736   for (i=0; i<rend-rstart; i++){
2737     row   = i + rstart;
2738     ncols = rptr[i+1] - rptr[i];
2739     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2740     vals += ncols;
2741     cols += ncols;
2742   }
2743   /* insert received matrix entries */
2744   for (imdex=0; imdex<nrecvs; imdex++){
2745     rstart = rowrange[recv_rank[imdex]];
2746     rend   = rowrange[recv_rank[imdex]+1];
2747     rptr = rbuf_j[imdex];
2748     cols = rbuf_j[imdex] + rend-rstart + 1;
2749     vals = rbuf_a[imdex];
2750     for (i=0; i<rend-rstart; i++){
2751       row   = i + rstart;
2752       ncols = rptr[i+1] - rptr[i];
2753       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2754       vals += ncols;
2755       cols += ncols;
2756     }
2757   }
2758   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2759   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2760   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2761   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2762   if (reuse == MAT_INITIAL_MATRIX) {
2763     PetscContainer container;
2764     *matredundant = C;
2765     /* create a supporting struct and attach it to C for reuse */
2766     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2767     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2768     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2769     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2770     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2771     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2772 
2773     redund->nzlocal = nzlocal;
2774     redund->nsends  = nsends;
2775     redund->nrecvs  = nrecvs;
2776     redund->send_rank = send_rank;
2777     redund->recv_rank = recv_rank;
2778     redund->sbuf_nz = sbuf_nz;
2779     redund->rbuf_nz = rbuf_nz;
2780     redund->sbuf_j  = sbuf_j;
2781     redund->sbuf_a  = sbuf_a;
2782     redund->rbuf_j  = rbuf_j;
2783     redund->rbuf_a  = rbuf_a;
2784 
2785     redund->Destroy = C->ops->destroy;
2786     C->ops->destroy = MatDestroy_MatRedundant;
2787   }
2788   PetscFunctionReturn(0);
2789 }
2790 
2791 #undef __FUNCT__
2792 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2793 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2794 {
2795   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2796   PetscErrorCode ierr;
2797   PetscInt       i,*idxb = 0;
2798   PetscScalar    *va,*vb;
2799   Vec            vtmp;
2800 
2801   PetscFunctionBegin;
2802   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2803   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2804   if (idx) {
2805     for (i=0; i<A->rmap->n; i++) {
2806       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2807     }
2808   }
2809 
2810   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2811   if (idx) {
2812     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2813   }
2814   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2815   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2816 
2817   for (i=0; i<A->rmap->n; i++){
2818     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2819       va[i] = vb[i];
2820       if (idx) idx[i] = a->garray[idxb[i]];
2821     }
2822   }
2823 
2824   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2825   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2826   ierr = PetscFree(idxb);CHKERRQ(ierr);
2827   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2828   PetscFunctionReturn(0);
2829 }
2830 
2831 #undef __FUNCT__
2832 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2833 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2834 {
2835   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2836   PetscErrorCode ierr;
2837   PetscInt       i,*idxb = 0;
2838   PetscScalar    *va,*vb;
2839   Vec            vtmp;
2840 
2841   PetscFunctionBegin;
2842   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2843   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2844   if (idx) {
2845     for (i=0; i<A->cmap->n; i++) {
2846       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2847     }
2848   }
2849 
2850   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2851   if (idx) {
2852     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2853   }
2854   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2855   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2856 
2857   for (i=0; i<A->rmap->n; i++){
2858     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2859       va[i] = vb[i];
2860       if (idx) idx[i] = a->garray[idxb[i]];
2861     }
2862   }
2863 
2864   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2865   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2866   ierr = PetscFree(idxb);CHKERRQ(ierr);
2867   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2868   PetscFunctionReturn(0);
2869 }
2870 
2871 #undef __FUNCT__
2872 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2873 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2874 {
2875   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2876   PetscInt       n      = A->rmap->n;
2877   PetscInt       cstart = A->cmap->rstart;
2878   PetscInt      *cmap   = mat->garray;
2879   PetscInt      *diagIdx, *offdiagIdx;
2880   Vec            diagV, offdiagV;
2881   PetscScalar   *a, *diagA, *offdiagA;
2882   PetscInt       r;
2883   PetscErrorCode ierr;
2884 
2885   PetscFunctionBegin;
2886   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2887   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2888   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2889   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2890   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2891   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2892   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2893   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2894   for(r = 0; r < n; ++r) {
2895     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2896       a[r]   = diagA[r];
2897       idx[r] = cstart + diagIdx[r];
2898     } else {
2899       a[r]   = offdiagA[r];
2900       idx[r] = cmap[offdiagIdx[r]];
2901     }
2902   }
2903   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2904   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2905   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2906   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2907   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2908   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2909   PetscFunctionReturn(0);
2910 }
2911 
2912 #undef __FUNCT__
2913 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2914 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2915 {
2916   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2917   PetscInt       n      = A->rmap->n;
2918   PetscInt       cstart = A->cmap->rstart;
2919   PetscInt      *cmap   = mat->garray;
2920   PetscInt      *diagIdx, *offdiagIdx;
2921   Vec            diagV, offdiagV;
2922   PetscScalar   *a, *diagA, *offdiagA;
2923   PetscInt       r;
2924   PetscErrorCode ierr;
2925 
2926   PetscFunctionBegin;
2927   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2928   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2929   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2930   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2931   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2932   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2933   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2934   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2935   for(r = 0; r < n; ++r) {
2936     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2937       a[r]   = diagA[r];
2938       idx[r] = cstart + diagIdx[r];
2939     } else {
2940       a[r]   = offdiagA[r];
2941       idx[r] = cmap[offdiagIdx[r]];
2942     }
2943   }
2944   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2945   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2946   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2947   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2948   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2949   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2950   PetscFunctionReturn(0);
2951 }
2952 
2953 #undef __FUNCT__
2954 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2955 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2956 {
2957   PetscErrorCode ierr;
2958   Mat            *dummy;
2959 
2960   PetscFunctionBegin;
2961   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2962   *newmat = *dummy;
2963   ierr = PetscFree(dummy);CHKERRQ(ierr);
2964   PetscFunctionReturn(0);
2965 }
2966 
2967 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2968 
2969 #undef __FUNCT__
2970 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2971 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2972 {
2973   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2974   PetscErrorCode ierr;
2975 
2976   PetscFunctionBegin;
2977   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2978   PetscFunctionReturn(0);
2979 }
2980 
2981 
2982 /* -------------------------------------------------------------------*/
2983 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2984        MatGetRow_MPIAIJ,
2985        MatRestoreRow_MPIAIJ,
2986        MatMult_MPIAIJ,
2987 /* 4*/ MatMultAdd_MPIAIJ,
2988        MatMultTranspose_MPIAIJ,
2989        MatMultTransposeAdd_MPIAIJ,
2990 #ifdef PETSC_HAVE_PBGL
2991        MatSolve_MPIAIJ,
2992 #else
2993        0,
2994 #endif
2995        0,
2996        0,
2997 /*10*/ 0,
2998        0,
2999        0,
3000        MatSOR_MPIAIJ,
3001        MatTranspose_MPIAIJ,
3002 /*15*/ MatGetInfo_MPIAIJ,
3003        MatEqual_MPIAIJ,
3004        MatGetDiagonal_MPIAIJ,
3005        MatDiagonalScale_MPIAIJ,
3006        MatNorm_MPIAIJ,
3007 /*20*/ MatAssemblyBegin_MPIAIJ,
3008        MatAssemblyEnd_MPIAIJ,
3009        MatSetOption_MPIAIJ,
3010        MatZeroEntries_MPIAIJ,
3011 /*24*/ MatZeroRows_MPIAIJ,
3012        0,
3013 #ifdef PETSC_HAVE_PBGL
3014        0,
3015 #else
3016        0,
3017 #endif
3018        0,
3019        0,
3020 /*29*/ MatSetUpPreallocation_MPIAIJ,
3021 #ifdef PETSC_HAVE_PBGL
3022        0,
3023 #else
3024        0,
3025 #endif
3026        0,
3027        0,
3028        0,
3029 /*34*/ MatDuplicate_MPIAIJ,
3030        0,
3031        0,
3032        0,
3033        0,
3034 /*39*/ MatAXPY_MPIAIJ,
3035        MatGetSubMatrices_MPIAIJ,
3036        MatIncreaseOverlap_MPIAIJ,
3037        MatGetValues_MPIAIJ,
3038        MatCopy_MPIAIJ,
3039 /*44*/ MatGetRowMax_MPIAIJ,
3040        MatScale_MPIAIJ,
3041        0,
3042        0,
3043        MatZeroRowsColumns_MPIAIJ,
3044 /*49*/ MatSetBlockSize_MPIAIJ,
3045        0,
3046        0,
3047        0,
3048        0,
3049 /*54*/ MatFDColoringCreate_MPIAIJ,
3050        0,
3051        MatSetUnfactored_MPIAIJ,
3052        MatPermute_MPIAIJ,
3053        0,
3054 /*59*/ MatGetSubMatrix_MPIAIJ,
3055        MatDestroy_MPIAIJ,
3056        MatView_MPIAIJ,
3057        0,
3058        0,
3059 /*64*/ 0,
3060        0,
3061        0,
3062        0,
3063        0,
3064 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3065        MatGetRowMinAbs_MPIAIJ,
3066        0,
3067        MatSetColoring_MPIAIJ,
3068 #if defined(PETSC_HAVE_ADIC)
3069        MatSetValuesAdic_MPIAIJ,
3070 #else
3071        0,
3072 #endif
3073        MatSetValuesAdifor_MPIAIJ,
3074 /*75*/ MatFDColoringApply_AIJ,
3075        0,
3076        0,
3077        0,
3078        0,
3079 /*80*/ 0,
3080        0,
3081        0,
3082 /*83*/ MatLoad_MPIAIJ,
3083        0,
3084        0,
3085        0,
3086        0,
3087        0,
3088 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3089        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3090        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3091        MatPtAP_Basic,
3092        MatPtAPSymbolic_MPIAIJ,
3093 /*94*/ MatPtAPNumeric_MPIAIJ,
3094        0,
3095        0,
3096        0,
3097        0,
3098 /*99*/ 0,
3099        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3100        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3101        MatConjugate_MPIAIJ,
3102        0,
3103 /*104*/MatSetValuesRow_MPIAIJ,
3104        MatRealPart_MPIAIJ,
3105        MatImaginaryPart_MPIAIJ,
3106        0,
3107        0,
3108 /*109*/0,
3109        MatGetRedundantMatrix_MPIAIJ,
3110        MatGetRowMin_MPIAIJ,
3111        0,
3112        0,
3113 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
3114        0,
3115        0,
3116        0,
3117        0,
3118 /*119*/0,
3119        0,
3120        0,
3121        0,
3122        MatGetMultiProcBlock_MPIAIJ,
3123 /*124*/MatFindNonZeroRows_MPIAIJ,
3124        MatGetColumnNorms_MPIAIJ,
3125        MatInvertBlockDiagonal_MPIAIJ,
3126        0,
3127        MatGetSubMatricesParallel_MPIAIJ
3128 };
3129 
3130 /* ----------------------------------------------------------------------------------------*/
3131 
3132 EXTERN_C_BEGIN
3133 #undef __FUNCT__
3134 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3135 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3136 {
3137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3138   PetscErrorCode ierr;
3139 
3140   PetscFunctionBegin;
3141   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3142   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3143   PetscFunctionReturn(0);
3144 }
3145 EXTERN_C_END
3146 
3147 EXTERN_C_BEGIN
3148 #undef __FUNCT__
3149 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3150 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3151 {
3152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3153   PetscErrorCode ierr;
3154 
3155   PetscFunctionBegin;
3156   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3157   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3158   PetscFunctionReturn(0);
3159 }
3160 EXTERN_C_END
3161 
3162 EXTERN_C_BEGIN
3163 #undef __FUNCT__
3164 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3165 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3166 {
3167   Mat_MPIAIJ     *b;
3168   PetscErrorCode ierr;
3169   PetscInt       i;
3170 
3171   PetscFunctionBegin;
3172   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3173   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3174   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3175   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3176 
3177   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3178   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3179   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3180   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3181   if (d_nnz) {
3182     for (i=0; i<B->rmap->n; i++) {
3183       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3184     }
3185   }
3186   if (o_nnz) {
3187     for (i=0; i<B->rmap->n; i++) {
3188       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3189     }
3190   }
3191   b = (Mat_MPIAIJ*)B->data;
3192 
3193   if (!B->preallocated) {
3194     /* Explicitly create 2 MATSEQAIJ matrices. */
3195     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3196     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3197     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3198     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3199     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3200     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3201     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3202     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3203   }
3204 
3205   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3206   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3207   B->preallocated = PETSC_TRUE;
3208   PetscFunctionReturn(0);
3209 }
3210 EXTERN_C_END
3211 
3212 #undef __FUNCT__
3213 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3214 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3215 {
3216   Mat            mat;
3217   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3218   PetscErrorCode ierr;
3219 
3220   PetscFunctionBegin;
3221   *newmat       = 0;
3222   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3223   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3224   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3225   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3226   a    = (Mat_MPIAIJ*)mat->data;
3227 
3228   mat->factortype    = matin->factortype;
3229   mat->rmap->bs      = matin->rmap->bs;
3230   mat->assembled    = PETSC_TRUE;
3231   mat->insertmode   = NOT_SET_VALUES;
3232   mat->preallocated = PETSC_TRUE;
3233 
3234   a->size           = oldmat->size;
3235   a->rank           = oldmat->rank;
3236   a->donotstash     = oldmat->donotstash;
3237   a->roworiented    = oldmat->roworiented;
3238   a->rowindices     = 0;
3239   a->rowvalues      = 0;
3240   a->getrowactive   = PETSC_FALSE;
3241 
3242   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3243   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3244 
3245   if (oldmat->colmap) {
3246 #if defined (PETSC_USE_CTABLE)
3247     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3248 #else
3249     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3250     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3251     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3252 #endif
3253   } else a->colmap = 0;
3254   if (oldmat->garray) {
3255     PetscInt len;
3256     len  = oldmat->B->cmap->n;
3257     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3258     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3259     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3260   } else a->garray = 0;
3261 
3262   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3263   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3264   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3265   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3266   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3267   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3268   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3269   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3270   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3271   *newmat = mat;
3272   PetscFunctionReturn(0);
3273 }
3274 
3275 
3276 
3277 #undef __FUNCT__
3278 #define __FUNCT__ "MatLoad_MPIAIJ"
3279 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3280 {
3281   PetscScalar    *vals,*svals;
3282   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3283   PetscErrorCode ierr;
3284   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3285   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3286   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3287   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3288   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3289   int            fd;
3290 
3291   PetscFunctionBegin;
3292   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3293   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3294   if (!rank) {
3295     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3296     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3297     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3298   }
3299 
3300   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3301 
3302   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3303   M = header[1]; N = header[2];
3304   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3305   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3306   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3307 
3308   /* If global sizes are set, check if they are consistent with that given in the file */
3309   if (sizesset) {
3310     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3311   }
3312   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3313   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3314 
3315   /* determine ownership of all rows */
3316   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3317   else m = newMat->rmap->n; /* Set by user */
3318 
3319   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3320   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3321 
3322   /* First process needs enough room for process with most rows */
3323   if (!rank) {
3324     mmax       = rowners[1];
3325     for (i=2; i<size; i++) {
3326       mmax = PetscMax(mmax,rowners[i]);
3327     }
3328   } else mmax = m;
3329 
3330   rowners[0] = 0;
3331   for (i=2; i<=size; i++) {
3332     rowners[i] += rowners[i-1];
3333   }
3334   rstart = rowners[rank];
3335   rend   = rowners[rank+1];
3336 
3337   /* distribute row lengths to all processors */
3338   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3339   if (!rank) {
3340     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3341     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3342     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3343     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3344     for (j=0; j<m; j++) {
3345       procsnz[0] += ourlens[j];
3346     }
3347     for (i=1; i<size; i++) {
3348       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3349       /* calculate the number of nonzeros on each processor */
3350       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3351         procsnz[i] += rowlengths[j];
3352       }
3353       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3354     }
3355     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3356   } else {
3357     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3358   }
3359 
3360   if (!rank) {
3361     /* determine max buffer needed and allocate it */
3362     maxnz = 0;
3363     for (i=0; i<size; i++) {
3364       maxnz = PetscMax(maxnz,procsnz[i]);
3365     }
3366     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3367 
3368     /* read in my part of the matrix column indices  */
3369     nz   = procsnz[0];
3370     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3371     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3372 
3373     /* read in every one elses and ship off */
3374     for (i=1; i<size; i++) {
3375       nz     = procsnz[i];
3376       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3377       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3378     }
3379     ierr = PetscFree(cols);CHKERRQ(ierr);
3380   } else {
3381     /* determine buffer space needed for message */
3382     nz = 0;
3383     for (i=0; i<m; i++) {
3384       nz += ourlens[i];
3385     }
3386     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3387 
3388     /* receive message of column indices*/
3389     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3390   }
3391 
3392   /* determine column ownership if matrix is not square */
3393   if (N != M) {
3394     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3395     else n = newMat->cmap->n;
3396     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3397     cstart = cend - n;
3398   } else {
3399     cstart = rstart;
3400     cend   = rend;
3401     n      = cend - cstart;
3402   }
3403 
3404   /* loop over local rows, determining number of off diagonal entries */
3405   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3406   jj = 0;
3407   for (i=0; i<m; i++) {
3408     for (j=0; j<ourlens[i]; j++) {
3409       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3410       jj++;
3411     }
3412   }
3413 
3414   for (i=0; i<m; i++) {
3415     ourlens[i] -= offlens[i];
3416   }
3417   if (!sizesset) {
3418     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3419   }
3420   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3421 
3422   for (i=0; i<m; i++) {
3423     ourlens[i] += offlens[i];
3424   }
3425 
3426   if (!rank) {
3427     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3428 
3429     /* read in my part of the matrix numerical values  */
3430     nz   = procsnz[0];
3431     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3432 
3433     /* insert into matrix */
3434     jj      = rstart;
3435     smycols = mycols;
3436     svals   = vals;
3437     for (i=0; i<m; i++) {
3438       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3439       smycols += ourlens[i];
3440       svals   += ourlens[i];
3441       jj++;
3442     }
3443 
3444     /* read in other processors and ship out */
3445     for (i=1; i<size; i++) {
3446       nz     = procsnz[i];
3447       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3448       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3449     }
3450     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3451   } else {
3452     /* receive numeric values */
3453     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3454 
3455     /* receive message of values*/
3456     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3457 
3458     /* insert into matrix */
3459     jj      = rstart;
3460     smycols = mycols;
3461     svals   = vals;
3462     for (i=0; i<m; i++) {
3463       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3464       smycols += ourlens[i];
3465       svals   += ourlens[i];
3466       jj++;
3467     }
3468   }
3469   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3470   ierr = PetscFree(vals);CHKERRQ(ierr);
3471   ierr = PetscFree(mycols);CHKERRQ(ierr);
3472   ierr = PetscFree(rowners);CHKERRQ(ierr);
3473 
3474   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3475   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3476   PetscFunctionReturn(0);
3477 }
3478 
3479 #undef __FUNCT__
3480 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3481 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3482 {
3483   PetscErrorCode ierr;
3484   IS             iscol_local;
3485   PetscInt       csize;
3486 
3487   PetscFunctionBegin;
3488   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3489   if (call == MAT_REUSE_MATRIX) {
3490     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3491     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3492   } else {
3493     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3494   }
3495   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3496   if (call == MAT_INITIAL_MATRIX) {
3497     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3498     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3499   }
3500   PetscFunctionReturn(0);
3501 }
3502 
3503 #undef __FUNCT__
3504 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3505 /*
3506     Not great since it makes two copies of the submatrix, first an SeqAIJ
3507   in local and then by concatenating the local matrices the end result.
3508   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3509 
3510   Note: This requires a sequential iscol with all indices.
3511 */
3512 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3513 {
3514   PetscErrorCode ierr;
3515   PetscMPIInt    rank,size;
3516   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3517   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3518   Mat            *local,M,Mreuse;
3519   MatScalar      *vwork,*aa;
3520   MPI_Comm       comm = ((PetscObject)mat)->comm;
3521   Mat_SeqAIJ     *aij;
3522 
3523 
3524   PetscFunctionBegin;
3525   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3527 
3528   if (call ==  MAT_REUSE_MATRIX) {
3529     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3530     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3531     local = &Mreuse;
3532     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3533   } else {
3534     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3535     Mreuse = *local;
3536     ierr   = PetscFree(local);CHKERRQ(ierr);
3537   }
3538 
3539   /*
3540       m - number of local rows
3541       n - number of columns (same on all processors)
3542       rstart - first row in new global matrix generated
3543   */
3544   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3545   if (call == MAT_INITIAL_MATRIX) {
3546     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3547     ii  = aij->i;
3548     jj  = aij->j;
3549 
3550     /*
3551         Determine the number of non-zeros in the diagonal and off-diagonal
3552         portions of the matrix in order to do correct preallocation
3553     */
3554 
3555     /* first get start and end of "diagonal" columns */
3556     if (csize == PETSC_DECIDE) {
3557       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3558       if (mglobal == n) { /* square matrix */
3559 	nlocal = m;
3560       } else {
3561         nlocal = n/size + ((n % size) > rank);
3562       }
3563     } else {
3564       nlocal = csize;
3565     }
3566     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3567     rstart = rend - nlocal;
3568     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3569 
3570     /* next, compute all the lengths */
3571     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3572     olens = dlens + m;
3573     for (i=0; i<m; i++) {
3574       jend = ii[i+1] - ii[i];
3575       olen = 0;
3576       dlen = 0;
3577       for (j=0; j<jend; j++) {
3578         if (*jj < rstart || *jj >= rend) olen++;
3579         else dlen++;
3580         jj++;
3581       }
3582       olens[i] = olen;
3583       dlens[i] = dlen;
3584     }
3585     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3586     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3587     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3588     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3589     ierr = PetscFree(dlens);CHKERRQ(ierr);
3590   } else {
3591     PetscInt ml,nl;
3592 
3593     M = *newmat;
3594     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3595     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3596     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3597     /*
3598          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3599        rather than the slower MatSetValues().
3600     */
3601     M->was_assembled = PETSC_TRUE;
3602     M->assembled     = PETSC_FALSE;
3603   }
3604   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3605   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3606   ii  = aij->i;
3607   jj  = aij->j;
3608   aa  = aij->a;
3609   for (i=0; i<m; i++) {
3610     row   = rstart + i;
3611     nz    = ii[i+1] - ii[i];
3612     cwork = jj;     jj += nz;
3613     vwork = aa;     aa += nz;
3614     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3615   }
3616 
3617   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3618   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3619   *newmat = M;
3620 
3621   /* save submatrix used in processor for next request */
3622   if (call ==  MAT_INITIAL_MATRIX) {
3623     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3624     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3625   }
3626 
3627   PetscFunctionReturn(0);
3628 }
3629 
3630 EXTERN_C_BEGIN
3631 #undef __FUNCT__
3632 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3633 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3634 {
3635   PetscInt       m,cstart, cend,j,nnz,i,d;
3636   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3637   const PetscInt *JJ;
3638   PetscScalar    *values;
3639   PetscErrorCode ierr;
3640 
3641   PetscFunctionBegin;
3642   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3643 
3644   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3645   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3646   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3647   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3648   m      = B->rmap->n;
3649   cstart = B->cmap->rstart;
3650   cend   = B->cmap->rend;
3651   rstart = B->rmap->rstart;
3652 
3653   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3654 
3655 #if defined(PETSC_USE_DEBUGGING)
3656   for (i=0; i<m; i++) {
3657     nnz     = Ii[i+1]- Ii[i];
3658     JJ      = J + Ii[i];
3659     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3660     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3661     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3662   }
3663 #endif
3664 
3665   for (i=0; i<m; i++) {
3666     nnz     = Ii[i+1]- Ii[i];
3667     JJ      = J + Ii[i];
3668     nnz_max = PetscMax(nnz_max,nnz);
3669     d       = 0;
3670     for (j=0; j<nnz; j++) {
3671       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3672     }
3673     d_nnz[i] = d;
3674     o_nnz[i] = nnz - d;
3675   }
3676   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3677   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3678 
3679   if (v) values = (PetscScalar*)v;
3680   else {
3681     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3682     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3683   }
3684 
3685   for (i=0; i<m; i++) {
3686     ii   = i + rstart;
3687     nnz  = Ii[i+1]- Ii[i];
3688     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3689   }
3690   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3691   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3692 
3693   if (!v) {
3694     ierr = PetscFree(values);CHKERRQ(ierr);
3695   }
3696   PetscFunctionReturn(0);
3697 }
3698 EXTERN_C_END
3699 
3700 #undef __FUNCT__
3701 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3702 /*@
3703    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3704    (the default parallel PETSc format).
3705 
3706    Collective on MPI_Comm
3707 
3708    Input Parameters:
3709 +  B - the matrix
3710 .  i - the indices into j for the start of each local row (starts with zero)
3711 .  j - the column indices for each local row (starts with zero)
3712 -  v - optional values in the matrix
3713 
3714    Level: developer
3715 
3716    Notes:
3717        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3718      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3719      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3720 
3721        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3722 
3723        The format which is used for the sparse matrix input, is equivalent to a
3724     row-major ordering.. i.e for the following matrix, the input data expected is
3725     as shown:
3726 
3727         1 0 0
3728         2 0 3     P0
3729        -------
3730         4 5 6     P1
3731 
3732      Process0 [P0]: rows_owned=[0,1]
3733         i =  {0,1,3}  [size = nrow+1  = 2+1]
3734         j =  {0,0,2}  [size = nz = 6]
3735         v =  {1,2,3}  [size = nz = 6]
3736 
3737      Process1 [P1]: rows_owned=[2]
3738         i =  {0,3}    [size = nrow+1  = 1+1]
3739         j =  {0,1,2}  [size = nz = 6]
3740         v =  {4,5,6}  [size = nz = 6]
3741 
3742 .keywords: matrix, aij, compressed row, sparse, parallel
3743 
3744 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3745           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3746 @*/
3747 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3748 {
3749   PetscErrorCode ierr;
3750 
3751   PetscFunctionBegin;
3752   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3753   PetscFunctionReturn(0);
3754 }
3755 
3756 #undef __FUNCT__
3757 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3758 /*@C
3759    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3760    (the default parallel PETSc format).  For good matrix assembly performance
3761    the user should preallocate the matrix storage by setting the parameters
3762    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3763    performance can be increased by more than a factor of 50.
3764 
3765    Collective on MPI_Comm
3766 
3767    Input Parameters:
3768 +  A - the matrix
3769 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3770            (same value is used for all local rows)
3771 .  d_nnz - array containing the number of nonzeros in the various rows of the
3772            DIAGONAL portion of the local submatrix (possibly different for each row)
3773            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3774            The size of this array is equal to the number of local rows, i.e 'm'.
3775            You must leave room for the diagonal entry even if it is zero.
3776 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3777            submatrix (same value is used for all local rows).
3778 -  o_nnz - array containing the number of nonzeros in the various rows of the
3779            OFF-DIAGONAL portion of the local submatrix (possibly different for
3780            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3781            structure. The size of this array is equal to the number
3782            of local rows, i.e 'm'.
3783 
3784    If the *_nnz parameter is given then the *_nz parameter is ignored
3785 
3786    The AIJ format (also called the Yale sparse matrix format or
3787    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3788    storage.  The stored row and column indices begin with zero.
3789    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3790 
3791    The parallel matrix is partitioned such that the first m0 rows belong to
3792    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3793    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3794 
3795    The DIAGONAL portion of the local submatrix of a processor can be defined
3796    as the submatrix which is obtained by extraction the part corresponding to
3797    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3798    first row that belongs to the processor, r2 is the last row belonging to
3799    the this processor, and c1-c2 is range of indices of the local part of a
3800    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3801    common case of a square matrix, the row and column ranges are the same and
3802    the DIAGONAL part is also square. The remaining portion of the local
3803    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3804 
3805    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3806 
3807    You can call MatGetInfo() to get information on how effective the preallocation was;
3808    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3809    You can also run with the option -info and look for messages with the string
3810    malloc in them to see if additional memory allocation was needed.
3811 
3812    Example usage:
3813 
3814    Consider the following 8x8 matrix with 34 non-zero values, that is
3815    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3816    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3817    as follows:
3818 
3819 .vb
3820             1  2  0  |  0  3  0  |  0  4
3821     Proc0   0  5  6  |  7  0  0  |  8  0
3822             9  0 10  | 11  0  0  | 12  0
3823     -------------------------------------
3824            13  0 14  | 15 16 17  |  0  0
3825     Proc1   0 18  0  | 19 20 21  |  0  0
3826             0  0  0  | 22 23  0  | 24  0
3827     -------------------------------------
3828     Proc2  25 26 27  |  0  0 28  | 29  0
3829            30  0  0  | 31 32 33  |  0 34
3830 .ve
3831 
3832    This can be represented as a collection of submatrices as:
3833 
3834 .vb
3835       A B C
3836       D E F
3837       G H I
3838 .ve
3839 
3840    Where the submatrices A,B,C are owned by proc0, D,E,F are
3841    owned by proc1, G,H,I are owned by proc2.
3842 
3843    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3844    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3845    The 'M','N' parameters are 8,8, and have the same values on all procs.
3846 
3847    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3848    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3849    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3850    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3851    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3852    matrix, ans [DF] as another SeqAIJ matrix.
3853 
3854    When d_nz, o_nz parameters are specified, d_nz storage elements are
3855    allocated for every row of the local diagonal submatrix, and o_nz
3856    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3857    One way to choose d_nz and o_nz is to use the max nonzerors per local
3858    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3859    In this case, the values of d_nz,o_nz are:
3860 .vb
3861      proc0 : dnz = 2, o_nz = 2
3862      proc1 : dnz = 3, o_nz = 2
3863      proc2 : dnz = 1, o_nz = 4
3864 .ve
3865    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3866    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3867    for proc3. i.e we are using 12+15+10=37 storage locations to store
3868    34 values.
3869 
3870    When d_nnz, o_nnz parameters are specified, the storage is specified
3871    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3872    In the above case the values for d_nnz,o_nnz are:
3873 .vb
3874      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3875      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3876      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3877 .ve
3878    Here the space allocated is sum of all the above values i.e 34, and
3879    hence pre-allocation is perfect.
3880 
3881    Level: intermediate
3882 
3883 .keywords: matrix, aij, compressed row, sparse, parallel
3884 
3885 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3886           MPIAIJ, MatGetInfo()
3887 @*/
3888 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3889 {
3890   PetscErrorCode ierr;
3891 
3892   PetscFunctionBegin;
3893   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3894   PetscFunctionReturn(0);
3895 }
3896 
3897 #undef __FUNCT__
3898 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3899 /*@
3900      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3901          CSR format the local rows.
3902 
3903    Collective on MPI_Comm
3904 
3905    Input Parameters:
3906 +  comm - MPI communicator
3907 .  m - number of local rows (Cannot be PETSC_DECIDE)
3908 .  n - This value should be the same as the local size used in creating the
3909        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3910        calculated if N is given) For square matrices n is almost always m.
3911 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3912 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3913 .   i - row indices
3914 .   j - column indices
3915 -   a - matrix values
3916 
3917    Output Parameter:
3918 .   mat - the matrix
3919 
3920    Level: intermediate
3921 
3922    Notes:
3923        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3924      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3925      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3926 
3927        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3928 
3929        The format which is used for the sparse matrix input, is equivalent to a
3930     row-major ordering.. i.e for the following matrix, the input data expected is
3931     as shown:
3932 
3933         1 0 0
3934         2 0 3     P0
3935        -------
3936         4 5 6     P1
3937 
3938      Process0 [P0]: rows_owned=[0,1]
3939         i =  {0,1,3}  [size = nrow+1  = 2+1]
3940         j =  {0,0,2}  [size = nz = 6]
3941         v =  {1,2,3}  [size = nz = 6]
3942 
3943      Process1 [P1]: rows_owned=[2]
3944         i =  {0,3}    [size = nrow+1  = 1+1]
3945         j =  {0,1,2}  [size = nz = 6]
3946         v =  {4,5,6}  [size = nz = 6]
3947 
3948 .keywords: matrix, aij, compressed row, sparse, parallel
3949 
3950 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3951           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3952 @*/
3953 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3954 {
3955   PetscErrorCode ierr;
3956 
3957  PetscFunctionBegin;
3958   if (i[0]) {
3959     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3960   }
3961   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3962   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3963   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3964   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3965   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3966   PetscFunctionReturn(0);
3967 }
3968 
3969 #undef __FUNCT__
3970 #define __FUNCT__ "MatCreateMPIAIJ"
3971 /*@C
3972    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3973    (the default parallel PETSc format).  For good matrix assembly performance
3974    the user should preallocate the matrix storage by setting the parameters
3975    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3976    performance can be increased by more than a factor of 50.
3977 
3978    Collective on MPI_Comm
3979 
3980    Input Parameters:
3981 +  comm - MPI communicator
3982 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3983            This value should be the same as the local size used in creating the
3984            y vector for the matrix-vector product y = Ax.
3985 .  n - This value should be the same as the local size used in creating the
3986        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3987        calculated if N is given) For square matrices n is almost always m.
3988 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3989 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3990 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3991            (same value is used for all local rows)
3992 .  d_nnz - array containing the number of nonzeros in the various rows of the
3993            DIAGONAL portion of the local submatrix (possibly different for each row)
3994            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3995            The size of this array is equal to the number of local rows, i.e 'm'.
3996            You must leave room for the diagonal entry even if it is zero.
3997 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3998            submatrix (same value is used for all local rows).
3999 -  o_nnz - array containing the number of nonzeros in the various rows of the
4000            OFF-DIAGONAL portion of the local submatrix (possibly different for
4001            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4002            structure. The size of this array is equal to the number
4003            of local rows, i.e 'm'.
4004 
4005    Output Parameter:
4006 .  A - the matrix
4007 
4008    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4009    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4010    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4011 
4012    Notes:
4013    If the *_nnz parameter is given then the *_nz parameter is ignored
4014 
4015    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4016    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4017    storage requirements for this matrix.
4018 
4019    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4020    processor than it must be used on all processors that share the object for
4021    that argument.
4022 
4023    The user MUST specify either the local or global matrix dimensions
4024    (possibly both).
4025 
4026    The parallel matrix is partitioned across processors such that the
4027    first m0 rows belong to process 0, the next m1 rows belong to
4028    process 1, the next m2 rows belong to process 2 etc.. where
4029    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4030    values corresponding to [m x N] submatrix.
4031 
4032    The columns are logically partitioned with the n0 columns belonging
4033    to 0th partition, the next n1 columns belonging to the next
4034    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4035 
4036    The DIAGONAL portion of the local submatrix on any given processor
4037    is the submatrix corresponding to the rows and columns m,n
4038    corresponding to the given processor. i.e diagonal matrix on
4039    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4040    etc. The remaining portion of the local submatrix [m x (N-n)]
4041    constitute the OFF-DIAGONAL portion. The example below better
4042    illustrates this concept.
4043 
4044    For a square global matrix we define each processor's diagonal portion
4045    to be its local rows and the corresponding columns (a square submatrix);
4046    each processor's off-diagonal portion encompasses the remainder of the
4047    local matrix (a rectangular submatrix).
4048 
4049    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4050 
4051    When calling this routine with a single process communicator, a matrix of
4052    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4053    type of communicator, use the construction mechanism:
4054      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4055 
4056    By default, this format uses inodes (identical nodes) when possible.
4057    We search for consecutive rows with the same nonzero structure, thereby
4058    reusing matrix information to achieve increased efficiency.
4059 
4060    Options Database Keys:
4061 +  -mat_no_inode  - Do not use inodes
4062 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4063 -  -mat_aij_oneindex - Internally use indexing starting at 1
4064         rather than 0.  Note that when calling MatSetValues(),
4065         the user still MUST index entries starting at 0!
4066 
4067 
4068    Example usage:
4069 
4070    Consider the following 8x8 matrix with 34 non-zero values, that is
4071    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4072    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4073    as follows:
4074 
4075 .vb
4076             1  2  0  |  0  3  0  |  0  4
4077     Proc0   0  5  6  |  7  0  0  |  8  0
4078             9  0 10  | 11  0  0  | 12  0
4079     -------------------------------------
4080            13  0 14  | 15 16 17  |  0  0
4081     Proc1   0 18  0  | 19 20 21  |  0  0
4082             0  0  0  | 22 23  0  | 24  0
4083     -------------------------------------
4084     Proc2  25 26 27  |  0  0 28  | 29  0
4085            30  0  0  | 31 32 33  |  0 34
4086 .ve
4087 
4088    This can be represented as a collection of submatrices as:
4089 
4090 .vb
4091       A B C
4092       D E F
4093       G H I
4094 .ve
4095 
4096    Where the submatrices A,B,C are owned by proc0, D,E,F are
4097    owned by proc1, G,H,I are owned by proc2.
4098 
4099    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4100    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4101    The 'M','N' parameters are 8,8, and have the same values on all procs.
4102 
4103    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4104    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4105    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4106    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4107    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4108    matrix, ans [DF] as another SeqAIJ matrix.
4109 
4110    When d_nz, o_nz parameters are specified, d_nz storage elements are
4111    allocated for every row of the local diagonal submatrix, and o_nz
4112    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4113    One way to choose d_nz and o_nz is to use the max nonzerors per local
4114    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4115    In this case, the values of d_nz,o_nz are:
4116 .vb
4117      proc0 : dnz = 2, o_nz = 2
4118      proc1 : dnz = 3, o_nz = 2
4119      proc2 : dnz = 1, o_nz = 4
4120 .ve
4121    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4122    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4123    for proc3. i.e we are using 12+15+10=37 storage locations to store
4124    34 values.
4125 
4126    When d_nnz, o_nnz parameters are specified, the storage is specified
4127    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4128    In the above case the values for d_nnz,o_nnz are:
4129 .vb
4130      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4131      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4132      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4133 .ve
4134    Here the space allocated is sum of all the above values i.e 34, and
4135    hence pre-allocation is perfect.
4136 
4137    Level: intermediate
4138 
4139 .keywords: matrix, aij, compressed row, sparse, parallel
4140 
4141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4142           MPIAIJ, MatCreateMPIAIJWithArrays()
4143 @*/
4144 PetscErrorCode  MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4145 {
4146   PetscErrorCode ierr;
4147   PetscMPIInt    size;
4148 
4149   PetscFunctionBegin;
4150   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4151   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4152   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4153   if (size > 1) {
4154     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4155     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4156   } else {
4157     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4158     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4159   }
4160   PetscFunctionReturn(0);
4161 }
4162 
4163 #undef __FUNCT__
4164 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4165 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4166 {
4167   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4168 
4169   PetscFunctionBegin;
4170   *Ad     = a->A;
4171   *Ao     = a->B;
4172   *colmap = a->garray;
4173   PetscFunctionReturn(0);
4174 }
4175 
4176 #undef __FUNCT__
4177 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4178 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4179 {
4180   PetscErrorCode ierr;
4181   PetscInt       i;
4182   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4183 
4184   PetscFunctionBegin;
4185   if (coloring->ctype == IS_COLORING_GLOBAL) {
4186     ISColoringValue *allcolors,*colors;
4187     ISColoring      ocoloring;
4188 
4189     /* set coloring for diagonal portion */
4190     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4191 
4192     /* set coloring for off-diagonal portion */
4193     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4194     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4195     for (i=0; i<a->B->cmap->n; i++) {
4196       colors[i] = allcolors[a->garray[i]];
4197     }
4198     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4199     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4200     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4201     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4202   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4203     ISColoringValue *colors;
4204     PetscInt        *larray;
4205     ISColoring      ocoloring;
4206 
4207     /* set coloring for diagonal portion */
4208     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4209     for (i=0; i<a->A->cmap->n; i++) {
4210       larray[i] = i + A->cmap->rstart;
4211     }
4212     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4213     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4214     for (i=0; i<a->A->cmap->n; i++) {
4215       colors[i] = coloring->colors[larray[i]];
4216     }
4217     ierr = PetscFree(larray);CHKERRQ(ierr);
4218     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4219     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4220     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4221 
4222     /* set coloring for off-diagonal portion */
4223     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4224     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4225     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4226     for (i=0; i<a->B->cmap->n; i++) {
4227       colors[i] = coloring->colors[larray[i]];
4228     }
4229     ierr = PetscFree(larray);CHKERRQ(ierr);
4230     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4231     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4232     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4233   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4234 
4235   PetscFunctionReturn(0);
4236 }
4237 
4238 #if defined(PETSC_HAVE_ADIC)
4239 #undef __FUNCT__
4240 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4241 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4242 {
4243   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4244   PetscErrorCode ierr;
4245 
4246   PetscFunctionBegin;
4247   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4248   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4249   PetscFunctionReturn(0);
4250 }
4251 #endif
4252 
4253 #undef __FUNCT__
4254 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4255 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4256 {
4257   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4258   PetscErrorCode ierr;
4259 
4260   PetscFunctionBegin;
4261   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4262   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4263   PetscFunctionReturn(0);
4264 }
4265 
4266 #undef __FUNCT__
4267 #define __FUNCT__ "MatMerge"
4268 /*@
4269       MatMerge - Creates a single large PETSc matrix by concatinating sequential
4270                  matrices from each processor
4271 
4272     Collective on MPI_Comm
4273 
4274    Input Parameters:
4275 +    comm - the communicators the parallel matrix will live on
4276 .    inmat - the input sequential matrices
4277 .    n - number of local columns (or PETSC_DECIDE)
4278 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4279 
4280    Output Parameter:
4281 .    outmat - the parallel matrix generated
4282 
4283     Level: advanced
4284 
4285    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4286 
4287 @*/
4288 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4289 {
4290   PetscErrorCode ierr;
4291   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
4292   PetscInt       *indx;
4293   PetscScalar    *values;
4294 
4295   PetscFunctionBegin;
4296   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4297   if (scall == MAT_INITIAL_MATRIX){
4298     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
4299     if (n == PETSC_DECIDE){
4300       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4301     }
4302     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4303     rstart -= m;
4304 
4305     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4306     for (i=0;i<m;i++) {
4307       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4308       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4309       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4310     }
4311     /* This routine will ONLY return MPIAIJ type matrix */
4312     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4313     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4314     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4315     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4316     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4317 
4318   } else if (scall == MAT_REUSE_MATRIX){
4319     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4320   } else {
4321     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4322   }
4323 
4324   for (i=0;i<m;i++) {
4325     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4326     Ii    = i + rstart;
4327     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4328     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4329   }
4330   ierr = MatDestroy(&inmat);CHKERRQ(ierr);
4331   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4332   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4333 
4334   PetscFunctionReturn(0);
4335 }
4336 
4337 #undef __FUNCT__
4338 #define __FUNCT__ "MatFileSplit"
4339 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4340 {
4341   PetscErrorCode    ierr;
4342   PetscMPIInt       rank;
4343   PetscInt          m,N,i,rstart,nnz;
4344   size_t            len;
4345   const PetscInt    *indx;
4346   PetscViewer       out;
4347   char              *name;
4348   Mat               B;
4349   const PetscScalar *values;
4350 
4351   PetscFunctionBegin;
4352   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4353   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4354   /* Should this be the type of the diagonal block of A? */
4355   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4356   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4357   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4358   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4359   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4360   for (i=0;i<m;i++) {
4361     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4362     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4363     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4364   }
4365   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4366   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4367 
4368   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4369   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4370   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4371   sprintf(name,"%s.%d",outfile,rank);
4372   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4373   ierr = PetscFree(name);
4374   ierr = MatView(B,out);CHKERRQ(ierr);
4375   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4376   ierr = MatDestroy(&B);CHKERRQ(ierr);
4377   PetscFunctionReturn(0);
4378 }
4379 
4380 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4381 #undef __FUNCT__
4382 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4383 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4384 {
4385   PetscErrorCode       ierr;
4386   Mat_Merge_SeqsToMPI  *merge;
4387   PetscContainer       container;
4388 
4389   PetscFunctionBegin;
4390   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4391   if (container) {
4392     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4393     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4394     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4395     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4396     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4397     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4398     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4399     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4400     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4401     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4402     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4403     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4404     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4405     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4406     ierr = PetscFree(merge);CHKERRQ(ierr);
4407     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4408   }
4409   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4410   PetscFunctionReturn(0);
4411 }
4412 
4413 #include <../src/mat/utils/freespace.h>
4414 #include <petscbt.h>
4415 
4416 #undef __FUNCT__
4417 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4418 /*@C
4419       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4420                  matrices from each processor
4421 
4422     Collective on MPI_Comm
4423 
4424    Input Parameters:
4425 +    comm - the communicators the parallel matrix will live on
4426 .    seqmat - the input sequential matrices
4427 .    m - number of local rows (or PETSC_DECIDE)
4428 .    n - number of local columns (or PETSC_DECIDE)
4429 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4430 
4431    Output Parameter:
4432 .    mpimat - the parallel matrix generated
4433 
4434     Level: advanced
4435 
4436    Notes:
4437      The dimensions of the sequential matrix in each processor MUST be the same.
4438      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4439      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4440 @*/
4441 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4442 {
4443   PetscErrorCode       ierr;
4444   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4445   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4446   PetscMPIInt          size,rank,taga,*len_s;
4447   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4448   PetscInt             proc,m;
4449   PetscInt             **buf_ri,**buf_rj;
4450   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4451   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4452   MPI_Request          *s_waits,*r_waits;
4453   MPI_Status           *status;
4454   MatScalar            *aa=a->a;
4455   MatScalar            **abuf_r,*ba_i;
4456   Mat_Merge_SeqsToMPI  *merge;
4457   PetscContainer       container;
4458 
4459   PetscFunctionBegin;
4460   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4461 
4462   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4463   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4464 
4465   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4466   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4467 
4468   bi     = merge->bi;
4469   bj     = merge->bj;
4470   buf_ri = merge->buf_ri;
4471   buf_rj = merge->buf_rj;
4472 
4473   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4474   owners = merge->rowmap->range;
4475   len_s  = merge->len_s;
4476 
4477   /* send and recv matrix values */
4478   /*-----------------------------*/
4479   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4480   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4481 
4482   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4483   for (proc=0,k=0; proc<size; proc++){
4484     if (!len_s[proc]) continue;
4485     i = owners[proc];
4486     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4487     k++;
4488   }
4489 
4490   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4491   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4492   ierr = PetscFree(status);CHKERRQ(ierr);
4493 
4494   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4495   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4496 
4497   /* insert mat values of mpimat */
4498   /*----------------------------*/
4499   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4500   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4501 
4502   for (k=0; k<merge->nrecv; k++){
4503     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4504     nrows = *(buf_ri_k[k]);
4505     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4506     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4507   }
4508 
4509   /* set values of ba */
4510   m = merge->rowmap->n;
4511   for (i=0; i<m; i++) {
4512     arow = owners[rank] + i;
4513     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4514     bnzi = bi[i+1] - bi[i];
4515     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4516 
4517     /* add local non-zero vals of this proc's seqmat into ba */
4518     anzi = ai[arow+1] - ai[arow];
4519     aj   = a->j + ai[arow];
4520     aa   = a->a + ai[arow];
4521     nextaj = 0;
4522     for (j=0; nextaj<anzi; j++){
4523       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4524         ba_i[j] += aa[nextaj++];
4525       }
4526     }
4527 
4528     /* add received vals into ba */
4529     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4530       /* i-th row */
4531       if (i == *nextrow[k]) {
4532         anzi = *(nextai[k]+1) - *nextai[k];
4533         aj   = buf_rj[k] + *(nextai[k]);
4534         aa   = abuf_r[k] + *(nextai[k]);
4535         nextaj = 0;
4536         for (j=0; nextaj<anzi; j++){
4537           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4538             ba_i[j] += aa[nextaj++];
4539           }
4540         }
4541         nextrow[k]++; nextai[k]++;
4542       }
4543     }
4544     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4545   }
4546   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4547   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4548 
4549   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4550   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4551   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4552   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4553   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4554   PetscFunctionReturn(0);
4555 }
4556 
4557 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4558 
4559 #undef __FUNCT__
4560 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4561 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4562 {
4563   PetscErrorCode       ierr;
4564   Mat                  B_mpi;
4565   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4566   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4567   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4568   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4569   PetscInt             len,proc,*dnz,*onz;
4570   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4571   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4572   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4573   MPI_Status           *status;
4574   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4575   PetscBT              lnkbt;
4576   Mat_Merge_SeqsToMPI  *merge;
4577   PetscContainer       container;
4578 
4579   PetscFunctionBegin;
4580   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4581 
4582   /* make sure it is a PETSc comm */
4583   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4584   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4585   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4586 
4587   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4588   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4589 
4590   /* determine row ownership */
4591   /*---------------------------------------------------------*/
4592   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4593   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4594   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4595   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4596   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4597   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4598   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4599 
4600   m      = merge->rowmap->n;
4601   M      = merge->rowmap->N;
4602   owners = merge->rowmap->range;
4603 
4604   /* determine the number of messages to send, their lengths */
4605   /*---------------------------------------------------------*/
4606   len_s  = merge->len_s;
4607 
4608   len = 0;  /* length of buf_si[] */
4609   merge->nsend = 0;
4610   for (proc=0; proc<size; proc++){
4611     len_si[proc] = 0;
4612     if (proc == rank){
4613       len_s[proc] = 0;
4614     } else {
4615       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4616       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4617     }
4618     if (len_s[proc]) {
4619       merge->nsend++;
4620       nrows = 0;
4621       for (i=owners[proc]; i<owners[proc+1]; i++){
4622         if (ai[i+1] > ai[i]) nrows++;
4623       }
4624       len_si[proc] = 2*(nrows+1);
4625       len += len_si[proc];
4626     }
4627   }
4628 
4629   /* determine the number and length of messages to receive for ij-structure */
4630   /*-------------------------------------------------------------------------*/
4631   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4632   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4633 
4634   /* post the Irecv of j-structure */
4635   /*-------------------------------*/
4636   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4637   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4638 
4639   /* post the Isend of j-structure */
4640   /*--------------------------------*/
4641   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4642 
4643   for (proc=0, k=0; proc<size; proc++){
4644     if (!len_s[proc]) continue;
4645     i = owners[proc];
4646     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4647     k++;
4648   }
4649 
4650   /* receives and sends of j-structure are complete */
4651   /*------------------------------------------------*/
4652   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4653   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4654 
4655   /* send and recv i-structure */
4656   /*---------------------------*/
4657   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4658   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4659 
4660   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4661   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4662   for (proc=0,k=0; proc<size; proc++){
4663     if (!len_s[proc]) continue;
4664     /* form outgoing message for i-structure:
4665          buf_si[0]:                 nrows to be sent
4666                [1:nrows]:           row index (global)
4667                [nrows+1:2*nrows+1]: i-structure index
4668     */
4669     /*-------------------------------------------*/
4670     nrows = len_si[proc]/2 - 1;
4671     buf_si_i    = buf_si + nrows+1;
4672     buf_si[0]   = nrows;
4673     buf_si_i[0] = 0;
4674     nrows = 0;
4675     for (i=owners[proc]; i<owners[proc+1]; i++){
4676       anzi = ai[i+1] - ai[i];
4677       if (anzi) {
4678         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4679         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4680         nrows++;
4681       }
4682     }
4683     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4684     k++;
4685     buf_si += len_si[proc];
4686   }
4687 
4688   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4689   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4690 
4691   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4692   for (i=0; i<merge->nrecv; i++){
4693     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4694   }
4695 
4696   ierr = PetscFree(len_si);CHKERRQ(ierr);
4697   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4698   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4699   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4700   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4701   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4702   ierr = PetscFree(status);CHKERRQ(ierr);
4703 
4704   /* compute a local seq matrix in each processor */
4705   /*----------------------------------------------*/
4706   /* allocate bi array and free space for accumulating nonzero column info */
4707   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4708   bi[0] = 0;
4709 
4710   /* create and initialize a linked list */
4711   nlnk = N+1;
4712   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4713 
4714   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4715   len = 0;
4716   len  = ai[owners[rank+1]] - ai[owners[rank]];
4717   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4718   current_space = free_space;
4719 
4720   /* determine symbolic info for each local row */
4721   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4722 
4723   for (k=0; k<merge->nrecv; k++){
4724     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4725     nrows = *buf_ri_k[k];
4726     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4727     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4728   }
4729 
4730   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4731   len = 0;
4732   for (i=0;i<m;i++) {
4733     bnzi   = 0;
4734     /* add local non-zero cols of this proc's seqmat into lnk */
4735     arow   = owners[rank] + i;
4736     anzi   = ai[arow+1] - ai[arow];
4737     aj     = a->j + ai[arow];
4738     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4739     bnzi += nlnk;
4740     /* add received col data into lnk */
4741     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4742       if (i == *nextrow[k]) { /* i-th row */
4743         anzi = *(nextai[k]+1) - *nextai[k];
4744         aj   = buf_rj[k] + *nextai[k];
4745         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4746         bnzi += nlnk;
4747         nextrow[k]++; nextai[k]++;
4748       }
4749     }
4750     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4751 
4752     /* if free space is not available, make more free space */
4753     if (current_space->local_remaining<bnzi) {
4754       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4755       nspacedouble++;
4756     }
4757     /* copy data into free space, then initialize lnk */
4758     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4759     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4760 
4761     current_space->array           += bnzi;
4762     current_space->local_used      += bnzi;
4763     current_space->local_remaining -= bnzi;
4764 
4765     bi[i+1] = bi[i] + bnzi;
4766   }
4767 
4768   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4769 
4770   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4771   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4772   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4773 
4774   /* create symbolic parallel matrix B_mpi */
4775   /*---------------------------------------*/
4776   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4777   if (n==PETSC_DECIDE) {
4778     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4779   } else {
4780     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4781   }
4782   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4783   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4784   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4785 
4786   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4787   B_mpi->assembled     = PETSC_FALSE;
4788   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4789   merge->bi            = bi;
4790   merge->bj            = bj;
4791   merge->buf_ri        = buf_ri;
4792   merge->buf_rj        = buf_rj;
4793   merge->coi           = PETSC_NULL;
4794   merge->coj           = PETSC_NULL;
4795   merge->owners_co     = PETSC_NULL;
4796 
4797   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4798 
4799   /* attach the supporting struct to B_mpi for reuse */
4800   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4801   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4802   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4803   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4804   *mpimat = B_mpi;
4805 
4806   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4807   PetscFunctionReturn(0);
4808 }
4809 
4810 #undef __FUNCT__
4811 #define __FUNCT__ "MatMerge_SeqsToMPI"
4812 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4813 {
4814   PetscErrorCode   ierr;
4815 
4816   PetscFunctionBegin;
4817   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4818   if (scall == MAT_INITIAL_MATRIX){
4819     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4820   }
4821   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4822   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4823   PetscFunctionReturn(0);
4824 }
4825 
4826 #undef __FUNCT__
4827 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4828 /*@
4829      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4830           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4831           with MatGetSize()
4832 
4833     Not Collective
4834 
4835    Input Parameters:
4836 +    A - the matrix
4837 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4838 
4839    Output Parameter:
4840 .    A_loc - the local sequential matrix generated
4841 
4842     Level: developer
4843 
4844 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4845 
4846 @*/
4847 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4848 {
4849   PetscErrorCode  ierr;
4850   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4851   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4852   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4853   MatScalar       *aa=a->a,*ba=b->a,*cam;
4854   PetscScalar     *ca;
4855   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4856   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4857   PetscBool       match;
4858 
4859   PetscFunctionBegin;
4860   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4861   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4862   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4863   if (scall == MAT_INITIAL_MATRIX){
4864     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4865     ci[0] = 0;
4866     for (i=0; i<am; i++){
4867       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4868     }
4869     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4870     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4871     k = 0;
4872     for (i=0; i<am; i++) {
4873       ncols_o = bi[i+1] - bi[i];
4874       ncols_d = ai[i+1] - ai[i];
4875       /* off-diagonal portion of A */
4876       for (jo=0; jo<ncols_o; jo++) {
4877         col = cmap[*bj];
4878         if (col >= cstart) break;
4879         cj[k]   = col; bj++;
4880         ca[k++] = *ba++;
4881       }
4882       /* diagonal portion of A */
4883       for (j=0; j<ncols_d; j++) {
4884         cj[k]   = cstart + *aj++;
4885         ca[k++] = *aa++;
4886       }
4887       /* off-diagonal portion of A */
4888       for (j=jo; j<ncols_o; j++) {
4889         cj[k]   = cmap[*bj++];
4890         ca[k++] = *ba++;
4891       }
4892     }
4893     /* put together the new matrix */
4894     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4895     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4896     /* Since these are PETSc arrays, change flags to free them as necessary. */
4897     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4898     mat->free_a  = PETSC_TRUE;
4899     mat->free_ij = PETSC_TRUE;
4900     mat->nonew   = 0;
4901   } else if (scall == MAT_REUSE_MATRIX){
4902     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4903     ci = mat->i; cj = mat->j; cam = mat->a;
4904     for (i=0; i<am; i++) {
4905       /* off-diagonal portion of A */
4906       ncols_o = bi[i+1] - bi[i];
4907       for (jo=0; jo<ncols_o; jo++) {
4908         col = cmap[*bj];
4909         if (col >= cstart) break;
4910         *cam++ = *ba++; bj++;
4911       }
4912       /* diagonal portion of A */
4913       ncols_d = ai[i+1] - ai[i];
4914       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4915       /* off-diagonal portion of A */
4916       for (j=jo; j<ncols_o; j++) {
4917         *cam++ = *ba++; bj++;
4918       }
4919     }
4920   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4921   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4922   PetscFunctionReturn(0);
4923 }
4924 
4925 #undef __FUNCT__
4926 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4927 /*@C
4928      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4929 
4930     Not Collective
4931 
4932    Input Parameters:
4933 +    A - the matrix
4934 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4935 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4936 
4937    Output Parameter:
4938 .    A_loc - the local sequential matrix generated
4939 
4940     Level: developer
4941 
4942 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4943 
4944 @*/
4945 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4946 {
4947   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4948   PetscErrorCode    ierr;
4949   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4950   IS                isrowa,iscola;
4951   Mat               *aloc;
4952   PetscBool       match;
4953 
4954   PetscFunctionBegin;
4955   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4956   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4957   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4958   if (!row){
4959     start = A->rmap->rstart; end = A->rmap->rend;
4960     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4961   } else {
4962     isrowa = *row;
4963   }
4964   if (!col){
4965     start = A->cmap->rstart;
4966     cmap  = a->garray;
4967     nzA   = a->A->cmap->n;
4968     nzB   = a->B->cmap->n;
4969     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4970     ncols = 0;
4971     for (i=0; i<nzB; i++) {
4972       if (cmap[i] < start) idx[ncols++] = cmap[i];
4973       else break;
4974     }
4975     imark = i;
4976     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4977     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4978     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4979   } else {
4980     iscola = *col;
4981   }
4982   if (scall != MAT_INITIAL_MATRIX){
4983     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4984     aloc[0] = *A_loc;
4985   }
4986   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4987   *A_loc = aloc[0];
4988   ierr = PetscFree(aloc);CHKERRQ(ierr);
4989   if (!row){
4990     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4991   }
4992   if (!col){
4993     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4994   }
4995   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4996   PetscFunctionReturn(0);
4997 }
4998 
4999 #undef __FUNCT__
5000 #define __FUNCT__ "MatGetBrowsOfAcols"
5001 /*@C
5002     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5003 
5004     Collective on Mat
5005 
5006    Input Parameters:
5007 +    A,B - the matrices in mpiaij format
5008 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5009 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5010 
5011    Output Parameter:
5012 +    rowb, colb - index sets of rows and columns of B to extract
5013 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
5014 -    B_seq - the sequential matrix generated
5015 
5016     Level: developer
5017 
5018 @*/
5019 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
5020 {
5021   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5022   PetscErrorCode    ierr;
5023   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5024   IS                isrowb,iscolb;
5025   Mat               *bseq;
5026 
5027   PetscFunctionBegin;
5028   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5029     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5030   }
5031   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5032 
5033   if (scall == MAT_INITIAL_MATRIX){
5034     start = A->cmap->rstart;
5035     cmap  = a->garray;
5036     nzA   = a->A->cmap->n;
5037     nzB   = a->B->cmap->n;
5038     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5039     ncols = 0;
5040     for (i=0; i<nzB; i++) {  /* row < local row index */
5041       if (cmap[i] < start) idx[ncols++] = cmap[i];
5042       else break;
5043     }
5044     imark = i;
5045     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5046     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5047     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5048     *brstart = imark;
5049     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5050   } else {
5051     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5052     isrowb = *rowb; iscolb = *colb;
5053     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5054     bseq[0] = *B_seq;
5055   }
5056   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5057   *B_seq = bseq[0];
5058   ierr = PetscFree(bseq);CHKERRQ(ierr);
5059   if (!rowb){
5060     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5061   } else {
5062     *rowb = isrowb;
5063   }
5064   if (!colb){
5065     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5066   } else {
5067     *colb = iscolb;
5068   }
5069   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5070   PetscFunctionReturn(0);
5071 }
5072 
5073 #undef __FUNCT__
5074 #define __FUNCT__ "MatGetBrowsOfAoCols"
5075 /*@C
5076     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5077     of the OFF-DIAGONAL portion of local A
5078 
5079     Collective on Mat
5080 
5081    Input Parameters:
5082 +    A,B - the matrices in mpiaij format
5083 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5084 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5085 .    startsj_r - similar to startsj for receives
5086 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5087 
5088    Output Parameter:
5089 +    B_oth - the sequential matrix generated
5090 
5091     Level: developer
5092 
5093 @*/
5094 PetscErrorCode  MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5095 {
5096   VecScatter_MPI_General *gen_to,*gen_from;
5097   PetscErrorCode         ierr;
5098   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5099   Mat_SeqAIJ             *b_oth;
5100   VecScatter             ctx=a->Mvctx;
5101   MPI_Comm               comm=((PetscObject)ctx)->comm;
5102   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5103   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5104   PetscScalar            *rvalues,*svalues;
5105   MatScalar              *b_otha,*bufa,*bufA;
5106   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5107   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5108   MPI_Status             *sstatus,rstatus;
5109   PetscMPIInt            jj;
5110   PetscInt               *cols,sbs,rbs;
5111   PetscScalar            *vals;
5112 
5113   PetscFunctionBegin;
5114   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5115     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5116   }
5117   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5118   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5119 
5120   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5121   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5122   rvalues  = gen_from->values; /* holds the length of receiving row */
5123   svalues  = gen_to->values;   /* holds the length of sending row */
5124   nrecvs   = gen_from->n;
5125   nsends   = gen_to->n;
5126 
5127   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5128   srow     = gen_to->indices;   /* local row index to be sent */
5129   sstarts  = gen_to->starts;
5130   sprocs   = gen_to->procs;
5131   sstatus  = gen_to->sstatus;
5132   sbs      = gen_to->bs;
5133   rstarts  = gen_from->starts;
5134   rprocs   = gen_from->procs;
5135   rbs      = gen_from->bs;
5136 
5137   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5138   if (scall == MAT_INITIAL_MATRIX){
5139     /* i-array */
5140     /*---------*/
5141     /*  post receives */
5142     for (i=0; i<nrecvs; i++){
5143       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5144       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5145       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5146     }
5147 
5148     /* pack the outgoing message */
5149     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5150     sstartsj[0] = 0;  rstartsj[0] = 0;
5151     len = 0; /* total length of j or a array to be sent */
5152     k = 0;
5153     for (i=0; i<nsends; i++){
5154       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5155       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5156       for (j=0; j<nrows; j++) {
5157         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5158         for (l=0; l<sbs; l++){
5159           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5160           rowlen[j*sbs+l] = ncols;
5161           len += ncols;
5162           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5163         }
5164         k++;
5165       }
5166       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5167       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5168     }
5169     /* recvs and sends of i-array are completed */
5170     i = nrecvs;
5171     while (i--) {
5172       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5173     }
5174     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5175 
5176     /* allocate buffers for sending j and a arrays */
5177     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5178     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5179 
5180     /* create i-array of B_oth */
5181     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5182     b_othi[0] = 0;
5183     len = 0; /* total length of j or a array to be received */
5184     k = 0;
5185     for (i=0; i<nrecvs; i++){
5186       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5187       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5188       for (j=0; j<nrows; j++) {
5189         b_othi[k+1] = b_othi[k] + rowlen[j];
5190         len += rowlen[j]; k++;
5191       }
5192       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5193     }
5194 
5195     /* allocate space for j and a arrrays of B_oth */
5196     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5197     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5198 
5199     /* j-array */
5200     /*---------*/
5201     /*  post receives of j-array */
5202     for (i=0; i<nrecvs; i++){
5203       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5204       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5205     }
5206 
5207     /* pack the outgoing message j-array */
5208     k = 0;
5209     for (i=0; i<nsends; i++){
5210       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5211       bufJ = bufj+sstartsj[i];
5212       for (j=0; j<nrows; j++) {
5213         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5214         for (ll=0; ll<sbs; ll++){
5215           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5216           for (l=0; l<ncols; l++){
5217             *bufJ++ = cols[l];
5218           }
5219           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5220         }
5221       }
5222       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5223     }
5224 
5225     /* recvs and sends of j-array are completed */
5226     i = nrecvs;
5227     while (i--) {
5228       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5229     }
5230     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5231   } else if (scall == MAT_REUSE_MATRIX){
5232     sstartsj = *startsj;
5233     rstartsj = *startsj_r;
5234     bufa     = *bufa_ptr;
5235     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5236     b_otha   = b_oth->a;
5237   } else {
5238     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5239   }
5240 
5241   /* a-array */
5242   /*---------*/
5243   /*  post receives of a-array */
5244   for (i=0; i<nrecvs; i++){
5245     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5246     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5247   }
5248 
5249   /* pack the outgoing message a-array */
5250   k = 0;
5251   for (i=0; i<nsends; i++){
5252     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5253     bufA = bufa+sstartsj[i];
5254     for (j=0; j<nrows; j++) {
5255       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5256       for (ll=0; ll<sbs; ll++){
5257         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5258         for (l=0; l<ncols; l++){
5259           *bufA++ = vals[l];
5260         }
5261         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5262       }
5263     }
5264     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5265   }
5266   /* recvs and sends of a-array are completed */
5267   i = nrecvs;
5268   while (i--) {
5269     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5270   }
5271   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5272   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5273 
5274   if (scall == MAT_INITIAL_MATRIX){
5275     /* put together the new matrix */
5276     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5277 
5278     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5279     /* Since these are PETSc arrays, change flags to free them as necessary. */
5280     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5281     b_oth->free_a  = PETSC_TRUE;
5282     b_oth->free_ij = PETSC_TRUE;
5283     b_oth->nonew   = 0;
5284 
5285     ierr = PetscFree(bufj);CHKERRQ(ierr);
5286     if (!startsj || !bufa_ptr){
5287       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5288       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5289     } else {
5290       *startsj   = sstartsj;
5291       *startsj_r = rstartsj;
5292       *bufa_ptr  = bufa;
5293     }
5294   }
5295   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5296   PetscFunctionReturn(0);
5297 }
5298 
5299 #undef __FUNCT__
5300 #define __FUNCT__ "MatGetCommunicationStructs"
5301 /*@C
5302   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5303 
5304   Not Collective
5305 
5306   Input Parameters:
5307 . A - The matrix in mpiaij format
5308 
5309   Output Parameter:
5310 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5311 . colmap - A map from global column index to local index into lvec
5312 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5313 
5314   Level: developer
5315 
5316 @*/
5317 #if defined (PETSC_USE_CTABLE)
5318 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5319 #else
5320 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5321 #endif
5322 {
5323   Mat_MPIAIJ *a;
5324 
5325   PetscFunctionBegin;
5326   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5327   PetscValidPointer(lvec, 2);
5328   PetscValidPointer(colmap, 3);
5329   PetscValidPointer(multScatter, 4);
5330   a = (Mat_MPIAIJ *) A->data;
5331   if (lvec) *lvec = a->lvec;
5332   if (colmap) *colmap = a->colmap;
5333   if (multScatter) *multScatter = a->Mvctx;
5334   PetscFunctionReturn(0);
5335 }
5336 
5337 EXTERN_C_BEGIN
5338 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5339 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5340 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5341 EXTERN_C_END
5342 
5343 #undef __FUNCT__
5344 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5345 /*
5346     Computes (B'*A')' since computing B*A directly is untenable
5347 
5348                n                       p                          p
5349         (              )       (              )         (                  )
5350       m (      A       )  *  n (       B      )   =   m (         C        )
5351         (              )       (              )         (                  )
5352 
5353 */
5354 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5355 {
5356   PetscErrorCode     ierr;
5357   Mat                At,Bt,Ct;
5358 
5359   PetscFunctionBegin;
5360   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5361   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5362   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5363   ierr = MatDestroy(&At);CHKERRQ(ierr);
5364   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5365   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5366   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5367   PetscFunctionReturn(0);
5368 }
5369 
5370 #undef __FUNCT__
5371 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5372 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5373 {
5374   PetscErrorCode ierr;
5375   PetscInt       m=A->rmap->n,n=B->cmap->n;
5376   Mat            Cmat;
5377 
5378   PetscFunctionBegin;
5379   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5380   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5381   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5382   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5383   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5384   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5385   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5386   *C   = Cmat;
5387   PetscFunctionReturn(0);
5388 }
5389 
5390 /* ----------------------------------------------------------------*/
5391 #undef __FUNCT__
5392 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5393 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5394 {
5395   PetscErrorCode ierr;
5396 
5397   PetscFunctionBegin;
5398   if (scall == MAT_INITIAL_MATRIX){
5399     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5400   }
5401   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5402   PetscFunctionReturn(0);
5403 }
5404 
5405 EXTERN_C_BEGIN
5406 #if defined(PETSC_HAVE_MUMPS)
5407 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5408 #endif
5409 #if defined(PETSC_HAVE_PASTIX)
5410 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5411 #endif
5412 #if defined(PETSC_HAVE_SUPERLU_DIST)
5413 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5414 #endif
5415 #if defined(PETSC_HAVE_SPOOLES)
5416 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5417 #endif
5418 EXTERN_C_END
5419 
5420 /*MC
5421    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5422 
5423    Options Database Keys:
5424 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5425 
5426   Level: beginner
5427 
5428 .seealso: MatCreateMPIAIJ()
5429 M*/
5430 
5431 EXTERN_C_BEGIN
5432 #undef __FUNCT__
5433 #define __FUNCT__ "MatCreate_MPIAIJ"
5434 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5435 {
5436   Mat_MPIAIJ     *b;
5437   PetscErrorCode ierr;
5438   PetscMPIInt    size;
5439 
5440   PetscFunctionBegin;
5441   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5442 
5443   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5444   B->data         = (void*)b;
5445   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5446   B->rmap->bs     = 1;
5447   B->assembled    = PETSC_FALSE;
5448 
5449   B->insertmode   = NOT_SET_VALUES;
5450   b->size         = size;
5451   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5452 
5453   /* build cache for off array entries formed */
5454   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5455   b->donotstash  = PETSC_FALSE;
5456   b->colmap      = 0;
5457   b->garray      = 0;
5458   b->roworiented = PETSC_TRUE;
5459 
5460   /* stuff used for matrix vector multiply */
5461   b->lvec      = PETSC_NULL;
5462   b->Mvctx     = PETSC_NULL;
5463 
5464   /* stuff for MatGetRow() */
5465   b->rowindices   = 0;
5466   b->rowvalues    = 0;
5467   b->getrowactive = PETSC_FALSE;
5468 
5469 #if defined(PETSC_HAVE_SPOOLES)
5470   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5471                                      "MatGetFactor_mpiaij_spooles",
5472                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5473 #endif
5474 #if defined(PETSC_HAVE_MUMPS)
5475   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5476                                      "MatGetFactor_aij_mumps",
5477                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5478 #endif
5479 #if defined(PETSC_HAVE_PASTIX)
5480   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5481 					   "MatGetFactor_mpiaij_pastix",
5482 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5483 #endif
5484 #if defined(PETSC_HAVE_SUPERLU_DIST)
5485   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5486                                      "MatGetFactor_mpiaij_superlu_dist",
5487                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5488 #endif
5489   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5490                                      "MatStoreValues_MPIAIJ",
5491                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5492   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5493                                      "MatRetrieveValues_MPIAIJ",
5494                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5495   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5496 				     "MatGetDiagonalBlock_MPIAIJ",
5497                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5498   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5499 				     "MatIsTranspose_MPIAIJ",
5500 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5501   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5502 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5503 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5504   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5505 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5506 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5507   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5508 				     "MatDiagonalScaleLocal_MPIAIJ",
5509 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5510   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5511                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5512                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5513   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5514                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5515                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5516   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5517                                      "MatConvert_MPIAIJ_MPISBAIJ",
5518                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5519   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5520                                      "MatMatMult_MPIDense_MPIAIJ",
5521                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5522   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5523                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5524                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5525   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5526                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5527                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5528   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5529   PetscFunctionReturn(0);
5530 }
5531 EXTERN_C_END
5532 
5533 #undef __FUNCT__
5534 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5535 /*@
5536      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5537          and "off-diagonal" part of the matrix in CSR format.
5538 
5539    Collective on MPI_Comm
5540 
5541    Input Parameters:
5542 +  comm - MPI communicator
5543 .  m - number of local rows (Cannot be PETSC_DECIDE)
5544 .  n - This value should be the same as the local size used in creating the
5545        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5546        calculated if N is given) For square matrices n is almost always m.
5547 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5548 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5549 .   i - row indices for "diagonal" portion of matrix
5550 .   j - column indices
5551 .   a - matrix values
5552 .   oi - row indices for "off-diagonal" portion of matrix
5553 .   oj - column indices
5554 -   oa - matrix values
5555 
5556    Output Parameter:
5557 .   mat - the matrix
5558 
5559    Level: advanced
5560 
5561    Notes:
5562        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5563        must free the arrays once the matrix has been destroyed and not before.
5564 
5565        The i and j indices are 0 based
5566 
5567        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5568 
5569        This sets local rows and cannot be used to set off-processor values.
5570 
5571        You cannot later use MatSetValues() to change values in this matrix.
5572 
5573 .keywords: matrix, aij, compressed row, sparse, parallel
5574 
5575 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5576           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5577 @*/
5578 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5579 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5580 {
5581   PetscErrorCode ierr;
5582   Mat_MPIAIJ     *maij;
5583 
5584  PetscFunctionBegin;
5585   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5586   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5587   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5588   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5589   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5590   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5591   maij = (Mat_MPIAIJ*) (*mat)->data;
5592   maij->donotstash     = PETSC_TRUE;
5593   (*mat)->preallocated = PETSC_TRUE;
5594 
5595   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5596   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5597   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5598   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5599 
5600   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5601   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5602 
5603   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5604   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5605   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5606   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5607 
5608   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5609   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5610   PetscFunctionReturn(0);
5611 }
5612 
5613 /*
5614     Special version for direct calls from Fortran
5615 */
5616 #include <private/fortranimpl.h>
5617 
5618 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5619 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5620 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5621 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5622 #endif
5623 
5624 /* Change these macros so can be used in void function */
5625 #undef CHKERRQ
5626 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5627 #undef SETERRQ2
5628 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5629 #undef SETERRQ
5630 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5631 
5632 EXTERN_C_BEGIN
5633 #undef __FUNCT__
5634 #define __FUNCT__ "matsetvaluesmpiaij_"
5635 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5636 {
5637   Mat             mat = *mmat;
5638   PetscInt        m = *mm, n = *mn;
5639   InsertMode      addv = *maddv;
5640   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5641   PetscScalar     value;
5642   PetscErrorCode  ierr;
5643 
5644   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5645   if (mat->insertmode == NOT_SET_VALUES) {
5646     mat->insertmode = addv;
5647   }
5648 #if defined(PETSC_USE_DEBUG)
5649   else if (mat->insertmode != addv) {
5650     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5651   }
5652 #endif
5653   {
5654   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5655   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5656   PetscBool       roworiented = aij->roworiented;
5657 
5658   /* Some Variables required in the macro */
5659   Mat             A = aij->A;
5660   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5661   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5662   MatScalar       *aa = a->a;
5663   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5664   Mat             B = aij->B;
5665   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5666   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5667   MatScalar       *ba = b->a;
5668 
5669   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5670   PetscInt        nonew = a->nonew;
5671   MatScalar       *ap1,*ap2;
5672 
5673   PetscFunctionBegin;
5674   for (i=0; i<m; i++) {
5675     if (im[i] < 0) continue;
5676 #if defined(PETSC_USE_DEBUG)
5677     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5678 #endif
5679     if (im[i] >= rstart && im[i] < rend) {
5680       row      = im[i] - rstart;
5681       lastcol1 = -1;
5682       rp1      = aj + ai[row];
5683       ap1      = aa + ai[row];
5684       rmax1    = aimax[row];
5685       nrow1    = ailen[row];
5686       low1     = 0;
5687       high1    = nrow1;
5688       lastcol2 = -1;
5689       rp2      = bj + bi[row];
5690       ap2      = ba + bi[row];
5691       rmax2    = bimax[row];
5692       nrow2    = bilen[row];
5693       low2     = 0;
5694       high2    = nrow2;
5695 
5696       for (j=0; j<n; j++) {
5697         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5698         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5699         if (in[j] >= cstart && in[j] < cend){
5700           col = in[j] - cstart;
5701           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5702         } else if (in[j] < 0) continue;
5703 #if defined(PETSC_USE_DEBUG)
5704         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5705 #endif
5706         else {
5707           if (mat->was_assembled) {
5708             if (!aij->colmap) {
5709               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5710             }
5711 #if defined (PETSC_USE_CTABLE)
5712             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5713 	    col--;
5714 #else
5715             col = aij->colmap[in[j]] - 1;
5716 #endif
5717             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5718               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5719               col =  in[j];
5720               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5721               B = aij->B;
5722               b = (Mat_SeqAIJ*)B->data;
5723               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5724               rp2      = bj + bi[row];
5725               ap2      = ba + bi[row];
5726               rmax2    = bimax[row];
5727               nrow2    = bilen[row];
5728               low2     = 0;
5729               high2    = nrow2;
5730               bm       = aij->B->rmap->n;
5731               ba = b->a;
5732             }
5733           } else col = in[j];
5734           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5735         }
5736       }
5737     } else {
5738       if (!aij->donotstash) {
5739         if (roworiented) {
5740           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5741         } else {
5742           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5743         }
5744       }
5745     }
5746   }}
5747   PetscFunctionReturnVoid();
5748 }
5749 EXTERN_C_END
5750 
5751