xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 5f042095cd6794de959e41580753d6ff4dec299c)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petscblaslapack.h>
4 
5 /*MC
6    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
7 
8    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
9    and MATMPIAIJ otherwise.  As a result, for single process communicators,
10   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
11   for communicators controlling multiple processes.  It is recommended that you call both of
12   the above preallocation routines for simplicity.
13 
14    Options Database Keys:
15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
16 
17   Developer Notes: Subclasses include MATAIJCUSP, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
18    enough exist.
19 
20   Level: beginner
21 
22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
23 M*/
24 
25 /*MC
26    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
27 
28    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
29    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
30    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
31   for communicators controlling multiple processes.  It is recommended that you call both of
32   the above preallocation routines for simplicity.
33 
34    Options Database Keys:
35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
36 
37   Level: beginner
38 
39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
40 M*/
41 
42 #undef __FUNCT__
43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ"
44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows)
45 {
46   PetscErrorCode  ierr;
47   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
48   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)mat->A->data;
49   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)mat->B->data;
50   const PetscInt  *ia,*ib;
51   const MatScalar *aa,*bb;
52   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
53   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
54 
55   PetscFunctionBegin;
56   *keptrows = 0;
57   ia = a->i;
58   ib = b->i;
59   for (i=0; i<m; i++) {
60     na = ia[i+1] - ia[i];
61     nb = ib[i+1] - ib[i];
62     if (!na && !nb) {
63       cnt++;
64       goto ok1;
65     }
66     aa = a->a + ia[i];
67     for (j=0; j<na; j++) {
68       if (aa[j] != 0.0) goto ok1;
69     }
70     bb = b->a + ib[i];
71     for (j=0; j <nb; j++) {
72       if (bb[j] != 0.0) goto ok1;
73     }
74     cnt++;
75     ok1:;
76   }
77   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr);
78   if (!n0rows) PetscFunctionReturn(0);
79   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
80   cnt = 0;
81   for (i=0; i<m; i++) {
82     na = ia[i+1] - ia[i];
83     nb = ib[i+1] - ib[i];
84     if (!na && !nb) continue;
85     aa = a->a + ia[i];
86     for(j=0; j<na;j++) {
87       if (aa[j] != 0.0) {
88         rows[cnt++] = rstart + i;
89         goto ok2;
90       }
91     }
92     bb = b->a + ib[i];
93     for (j=0; j<nb; j++) {
94       if (bb[j] != 0.0) {
95         rows[cnt++] = rstart + i;
96         goto ok2;
97       }
98     }
99     ok2:;
100   }
101   ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
102   PetscFunctionReturn(0);
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
108 {
109   PetscErrorCode ierr;
110   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
111   PetscInt       i,n,*garray = aij->garray;
112   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
113   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
114   PetscReal      *work;
115 
116   PetscFunctionBegin;
117   ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr);
118   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
119   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
120   if (type == NORM_2) {
121     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
122       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
123     }
124     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
125       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
126     }
127   } else if (type == NORM_1) {
128     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
129       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
130     }
131     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
132       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
133     }
134   } else if (type == NORM_INFINITY) {
135     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
136       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
137     }
138     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
139       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
140     }
141 
142   } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType");
143   if (type == NORM_INFINITY) {
144     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
145   } else {
146     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
147   }
148   ierr = PetscFree(work);CHKERRQ(ierr);
149   if (type == NORM_2) {
150     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatDistribute_MPIAIJ"
157 /*
158     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
159     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
160 
161     Only for square matrices
162 */
163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
164 {
165   PetscMPIInt    rank,size;
166   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
167   PetscErrorCode ierr;
168   Mat            mat;
169   Mat_SeqAIJ     *gmata;
170   PetscMPIInt    tag;
171   MPI_Status     status;
172   PetscBool      aij;
173   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
174 
175   PetscFunctionBegin;
176   CHKMEMQ;
177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
179   if (!rank) {
180     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
181     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
182   }
183   if (reuse == MAT_INITIAL_MATRIX) {
184     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
185     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
186     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
187     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
188     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
189     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
190     rowners[0] = 0;
191     for (i=2; i<=size; i++) {
192       rowners[i] += rowners[i-1];
193     }
194     rstart = rowners[rank];
195     rend   = rowners[rank+1];
196     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
197     if (!rank) {
198       gmata = (Mat_SeqAIJ*) gmat->data;
199       /* send row lengths to all processors */
200       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
201       for (i=1; i<size; i++) {
202 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
203       }
204       /* determine number diagonal and off-diagonal counts */
205       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
206       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
207       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
208       jj = 0;
209       for (i=0; i<m; i++) {
210 	for (j=0; j<dlens[i]; j++) {
211           if (gmata->j[jj] < rstart) ld[i]++;
212 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
213 	  jj++;
214 	}
215       }
216       /* send column indices to other processes */
217       for (i=1; i<size; i++) {
218 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
219 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
220 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
221       }
222 
223       /* send numerical values to other processes */
224       for (i=1; i<size; i++) {
225         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
226         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
227       }
228       gmataa = gmata->a;
229       gmataj = gmata->j;
230 
231     } else {
232       /* receive row lengths */
233       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
234       /* receive column indices */
235       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
236       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
237       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
238       /* determine number diagonal and off-diagonal counts */
239       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
240       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
241       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
242       jj = 0;
243       for (i=0; i<m; i++) {
244 	for (j=0; j<dlens[i]; j++) {
245           if (gmataj[jj] < rstart) ld[i]++;
246 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
247 	  jj++;
248 	}
249       }
250       /* receive numerical values */
251       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
252       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
253     }
254     /* set preallocation */
255     for (i=0; i<m; i++) {
256       dlens[i] -= olens[i];
257     }
258     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
259     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
260 
261     for (i=0; i<m; i++) {
262       dlens[i] += olens[i];
263     }
264     cnt  = 0;
265     for (i=0; i<m; i++) {
266       row  = rstart + i;
267       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
268       cnt += dlens[i];
269     }
270     if (rank) {
271       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
272     }
273     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
274     ierr = PetscFree(rowners);CHKERRQ(ierr);
275     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
276     *inmat = mat;
277   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
278     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
279     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
280     mat   = *inmat;
281     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
282     if (!rank) {
283       /* send numerical values to other processes */
284       gmata = (Mat_SeqAIJ*) gmat->data;
285       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
286       gmataa = gmata->a;
287       for (i=1; i<size; i++) {
288         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
289         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
290       }
291       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
292     } else {
293       /* receive numerical values from process 0*/
294       nz   = Ad->nz + Ao->nz;
295       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
296       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
297     }
298     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
299     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
300     ad = Ad->a;
301     ao = Ao->a;
302     if (mat->rmap->n) {
303       i  = 0;
304       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
305       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
306     }
307     for (i=1; i<mat->rmap->n; i++) {
308       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
309       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
310     }
311     i--;
312     if (mat->rmap->n) {
313       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
314     }
315     if (rank) {
316       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
317     }
318   }
319   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
320   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
321   CHKMEMQ;
322   PetscFunctionReturn(0);
323 }
324 
325 /*
326   Local utility routine that creates a mapping from the global column
327 number to the local number in the off-diagonal part of the local
328 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
329 a slightly higher hash table cost; without it it is not scalable (each processor
330 has an order N integer array but is fast to acess.
331 */
332 #undef __FUNCT__
333 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
334 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
335 {
336   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
337   PetscErrorCode ierr;
338   PetscInt       n = aij->B->cmap->n,i;
339 
340   PetscFunctionBegin;
341   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
342 #if defined (PETSC_USE_CTABLE)
343   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
344   for (i=0; i<n; i++){
345     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
346   }
347 #else
348   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
349   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
350   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
351   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
352 #endif
353   PetscFunctionReturn(0);
354 }
355 
356 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
357 { \
358     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
359     lastcol1 = col;\
360     while (high1-low1 > 5) { \
361       t = (low1+high1)/2; \
362       if (rp1[t] > col) high1 = t; \
363       else             low1  = t; \
364     } \
365       for (_i=low1; _i<high1; _i++) { \
366         if (rp1[_i] > col) break; \
367         if (rp1[_i] == col) { \
368           if (addv == ADD_VALUES) ap1[_i] += value;   \
369           else                    ap1[_i] = value; \
370           goto a_noinsert; \
371         } \
372       }  \
373       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
374       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
375       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
376       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
377       N = nrow1++ - 1; a->nz++; high1++; \
378       /* shift up all the later entries in this row */ \
379       for (ii=N; ii>=_i; ii--) { \
380         rp1[ii+1] = rp1[ii]; \
381         ap1[ii+1] = ap1[ii]; \
382       } \
383       rp1[_i] = col;  \
384       ap1[_i] = value;  \
385       a_noinsert: ; \
386       ailen[row] = nrow1; \
387 }
388 
389 
390 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
391 { \
392     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
393     lastcol2 = col;\
394     while (high2-low2 > 5) { \
395       t = (low2+high2)/2; \
396       if (rp2[t] > col) high2 = t; \
397       else             low2  = t; \
398     } \
399     for (_i=low2; _i<high2; _i++) {		\
400       if (rp2[_i] > col) break;			\
401       if (rp2[_i] == col) {			      \
402 	if (addv == ADD_VALUES) ap2[_i] += value;     \
403 	else                    ap2[_i] = value;      \
404 	goto b_noinsert;			      \
405       }						      \
406     }							      \
407     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
408     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
409     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
410     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
411     N = nrow2++ - 1; b->nz++; high2++;					\
412     /* shift up all the later entries in this row */			\
413     for (ii=N; ii>=_i; ii--) {						\
414       rp2[ii+1] = rp2[ii];						\
415       ap2[ii+1] = ap2[ii];						\
416     }									\
417     rp2[_i] = col;							\
418     ap2[_i] = value;							\
419     b_noinsert: ;								\
420     bilen[row] = nrow2;							\
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
425 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
426 {
427   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
428   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
429   PetscErrorCode ierr;
430   PetscInt       l,*garray = mat->garray,diag;
431 
432   PetscFunctionBegin;
433   /* code only works for square matrices A */
434 
435   /* find size of row to the left of the diagonal part */
436   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
437   row  = row - diag;
438   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
439     if (garray[b->j[b->i[row]+l]] > diag) break;
440   }
441   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
442 
443   /* diagonal part */
444   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
445 
446   /* right of diagonal part */
447   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
448   PetscFunctionReturn(0);
449 }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValues_MPIAIJ"
453 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscScalar    value;
457   PetscErrorCode ierr;
458   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
459   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
460   PetscBool      roworiented = aij->roworiented;
461 
462   /* Some Variables required in the macro */
463   Mat            A = aij->A;
464   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
465   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
466   MatScalar      *aa = a->a;
467   PetscBool      ignorezeroentries = a->ignorezeroentries;
468   Mat            B = aij->B;
469   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
470   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
471   MatScalar      *ba = b->a;
472 
473   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
474   PetscInt       nonew;
475   MatScalar      *ap1,*ap2;
476 
477   PetscFunctionBegin;
478   if (v) PetscValidScalarPointer(v,6);
479   for (i=0; i<m; i++) {
480     if (im[i] < 0) continue;
481 #if defined(PETSC_USE_DEBUG)
482     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
483 #endif
484     if (im[i] >= rstart && im[i] < rend) {
485       row      = im[i] - rstart;
486       lastcol1 = -1;
487       rp1      = aj + ai[row];
488       ap1      = aa + ai[row];
489       rmax1    = aimax[row];
490       nrow1    = ailen[row];
491       low1     = 0;
492       high1    = nrow1;
493       lastcol2 = -1;
494       rp2      = bj + bi[row];
495       ap2      = ba + bi[row];
496       rmax2    = bimax[row];
497       nrow2    = bilen[row];
498       low2     = 0;
499       high2    = nrow2;
500 
501       for (j=0; j<n; j++) {
502         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
503         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
504         if (in[j] >= cstart && in[j] < cend){
505           col = in[j] - cstart;
506           nonew = a->nonew;
507           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
508         } else if (in[j] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
511 #endif
512         else {
513           if (mat->was_assembled) {
514             if (!aij->colmap) {
515               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
516             }
517 #if defined (PETSC_USE_CTABLE)
518             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
519 	    col--;
520 #else
521             col = aij->colmap[in[j]] - 1;
522 #endif
523             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
524               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
525               col =  in[j];
526               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
527               B = aij->B;
528               b = (Mat_SeqAIJ*)B->data;
529               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
530               rp2      = bj + bi[row];
531               ap2      = ba + bi[row];
532               rmax2    = bimax[row];
533               nrow2    = bilen[row];
534               low2     = 0;
535               high2    = nrow2;
536               bm       = aij->B->rmap->n;
537               ba = b->a;
538             }
539           } else col = in[j];
540           nonew = b->nonew;
541           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
542         }
543       }
544     } else {
545       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
546       if (!aij->donotstash) {
547         mat->assembled = PETSC_FALSE;
548         if (roworiented) {
549           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
550         } else {
551           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
552         }
553       }
554     }
555   }
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatGetValues_MPIAIJ"
561 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
562 {
563   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
564   PetscErrorCode ierr;
565   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
566   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
567 
568   PetscFunctionBegin;
569   for (i=0; i<m; i++) {
570     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
571     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
572     if (idxm[i] >= rstart && idxm[i] < rend) {
573       row = idxm[i] - rstart;
574       for (j=0; j<n; j++) {
575         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
576         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
577         if (idxn[j] >= cstart && idxn[j] < cend){
578           col = idxn[j] - cstart;
579           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
580         } else {
581           if (!aij->colmap) {
582             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
583           }
584 #if defined (PETSC_USE_CTABLE)
585           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
586           col --;
587 #else
588           col = aij->colmap[idxn[j]] - 1;
589 #endif
590           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
591           else {
592             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
593           }
594         }
595       }
596     } else {
597       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
598     }
599   }
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
607 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
608 {
609   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
610   PetscErrorCode ierr;
611   PetscInt       nstash,reallocs;
612   InsertMode     addv;
613 
614   PetscFunctionBegin;
615   if (aij->donotstash || mat->nooffprocentries) {
616     PetscFunctionReturn(0);
617   }
618 
619   /* make sure all processors are either in INSERTMODE or ADDMODE */
620   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
621   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
622   mat->insertmode = addv; /* in case this processor had no cache */
623 
624   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
625   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
626   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
627   PetscFunctionReturn(0);
628 }
629 
630 #undef __FUNCT__
631 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
632 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
633 {
634   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
635   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
636   PetscErrorCode ierr;
637   PetscMPIInt    n;
638   PetscInt       i,j,rstart,ncols,flg;
639   PetscInt       *row,*col;
640   PetscBool      other_disassembled;
641   PetscScalar    *val;
642   InsertMode     addv = mat->insertmode;
643 
644   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
645   PetscFunctionBegin;
646   if (!aij->donotstash && !mat->nooffprocentries) {
647     while (1) {
648       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
649       if (!flg) break;
650 
651       for (i=0; i<n;) {
652         /* Now identify the consecutive vals belonging to the same row */
653         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
654         if (j < n) ncols = j-i;
655         else       ncols = n-i;
656         /* Now assemble all these values with a single function call */
657         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
658         i = j;
659       }
660     }
661     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
662   }
663   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
664   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
665 
666   /* determine if any processor has disassembled, if so we must
667      also disassemble ourselfs, in order that we may reassemble. */
668   /*
669      if nonzero structure of submatrix B cannot change then we know that
670      no processor disassembled thus we can skip this stuff
671   */
672   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
673     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
674     if (mat->was_assembled && !other_disassembled) {
675       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
676     }
677   }
678   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
679     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
680   }
681   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
682   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
683   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
684   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
685 
686   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
687   aij->rowvalues = 0;
688 
689   /* used by MatAXPY() */
690   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
691   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
692 
693   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
694   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
695   PetscFunctionReturn(0);
696 }
697 
698 #undef __FUNCT__
699 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
700 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
701 {
702   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
703   PetscErrorCode ierr;
704 
705   PetscFunctionBegin;
706   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
707   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatZeroRows_MPIAIJ"
713 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
714 {
715   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
716   PetscErrorCode    ierr;
717   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
718   PetscInt          i,*owners = A->rmap->range;
719   PetscInt          *nprocs,j,idx,nsends,row;
720   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
721   PetscInt          *rvalues,count,base,slen,*source;
722   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
723   MPI_Comm          comm = ((PetscObject)A)->comm;
724   MPI_Request       *send_waits,*recv_waits;
725   MPI_Status        recv_status,*send_status;
726   const PetscScalar *xx;
727   PetscScalar       *bb;
728 #if defined(PETSC_DEBUG)
729   PetscBool      found = PETSC_FALSE;
730 #endif
731 
732   PetscFunctionBegin;
733   /*  first count number of contributors to each processor */
734   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
735   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
736   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
737   j = 0;
738   for (i=0; i<N; i++) {
739     if (lastidx > (idx = rows[i])) j = 0;
740     lastidx = idx;
741     for (; j<size; j++) {
742       if (idx >= owners[j] && idx < owners[j+1]) {
743         nprocs[2*j]++;
744         nprocs[2*j+1] = 1;
745         owner[i] = j;
746 #if defined(PETSC_DEBUG)
747         found = PETSC_TRUE;
748 #endif
749         break;
750       }
751     }
752 #if defined(PETSC_DEBUG)
753     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
754     found = PETSC_FALSE;
755 #endif
756   }
757   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
758 
759   if (A->nooffproczerorows) {
760     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
761     nrecvs = nsends;
762     nmax   = N;
763   } else {
764     /* inform other processors of number of messages and max length*/
765     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
766   }
767 
768   /* post receives:   */
769   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
770   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
771   for (i=0; i<nrecvs; i++) {
772     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
773   }
774 
775   /* do sends:
776       1) starts[i] gives the starting index in svalues for stuff going to
777          the ith processor
778   */
779   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
780   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
781   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
782   starts[0] = 0;
783   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
784   for (i=0; i<N; i++) {
785     svalues[starts[owner[i]]++] = rows[i];
786   }
787 
788   starts[0] = 0;
789   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
790   count = 0;
791   for (i=0; i<size; i++) {
792     if (nprocs[2*i+1]) {
793       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
794     }
795   }
796   ierr = PetscFree(starts);CHKERRQ(ierr);
797 
798   base = owners[rank];
799 
800   /*  wait on receives */
801   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
802   count  = nrecvs; slen = 0;
803   while (count) {
804     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
805     /* unpack receives into our local space */
806     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
807     source[imdex]  = recv_status.MPI_SOURCE;
808     lens[imdex]    = n;
809     slen          += n;
810     count--;
811   }
812   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
813 
814   /* move the data into the send scatter */
815   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
816   count = 0;
817   for (i=0; i<nrecvs; i++) {
818     values = rvalues + i*nmax;
819     for (j=0; j<lens[i]; j++) {
820       lrows[count++] = values[j] - base;
821     }
822   }
823   ierr = PetscFree(rvalues);CHKERRQ(ierr);
824   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
825   ierr = PetscFree(owner);CHKERRQ(ierr);
826   ierr = PetscFree(nprocs);CHKERRQ(ierr);
827 
828   /* fix right hand side if needed */
829   if (x && b) {
830     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
831     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
832     for (i=0; i<slen; i++) {
833       bb[lrows[i]] = diag*xx[lrows[i]];
834     }
835     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
836     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
837   }
838   /*
839         Zero the required rows. If the "diagonal block" of the matrix
840      is square and the user wishes to set the diagonal we use separate
841      code so that MatSetValues() is not called for each diagonal allocating
842      new memory, thus calling lots of mallocs and slowing things down.
843 
844   */
845   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
846   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
847   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
848     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
849   } else if (diag != 0.0) {
850     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
851     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
852       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
853 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
854     }
855     for (i = 0; i < slen; i++) {
856       row  = lrows[i] + rstart;
857       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
858     }
859     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
860     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
861   } else {
862     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
863   }
864   ierr = PetscFree(lrows);CHKERRQ(ierr);
865 
866   /* wait on sends */
867   if (nsends) {
868     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
869     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
870     ierr = PetscFree(send_status);CHKERRQ(ierr);
871   }
872   ierr = PetscFree(send_waits);CHKERRQ(ierr);
873   ierr = PetscFree(svalues);CHKERRQ(ierr);
874   PetscFunctionReturn(0);
875 }
876 
877 #undef __FUNCT__
878 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
879 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
880 {
881   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
882   PetscErrorCode    ierr;
883   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
884   PetscInt          i,*owners = A->rmap->range;
885   PetscInt          *nprocs,j,idx,nsends;
886   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
887   PetscInt          *rvalues,count,base,slen,*source;
888   PetscInt          *lens,*lrows,*values,m;
889   MPI_Comm          comm = ((PetscObject)A)->comm;
890   MPI_Request       *send_waits,*recv_waits;
891   MPI_Status        recv_status,*send_status;
892   const PetscScalar *xx;
893   PetscScalar       *bb,*mask;
894   Vec               xmask,lmask;
895   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
896   const PetscInt    *aj, *ii,*ridx;
897   PetscScalar       *aa;
898 #if defined(PETSC_DEBUG)
899   PetscBool         found = PETSC_FALSE;
900 #endif
901 
902   PetscFunctionBegin;
903   /*  first count number of contributors to each processor */
904   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
905   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
906   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
907   j = 0;
908   for (i=0; i<N; i++) {
909     if (lastidx > (idx = rows[i])) j = 0;
910     lastidx = idx;
911     for (; j<size; j++) {
912       if (idx >= owners[j] && idx < owners[j+1]) {
913         nprocs[2*j]++;
914         nprocs[2*j+1] = 1;
915         owner[i] = j;
916 #if defined(PETSC_DEBUG)
917         found = PETSC_TRUE;
918 #endif
919         break;
920       }
921     }
922 #if defined(PETSC_DEBUG)
923     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
924     found = PETSC_FALSE;
925 #endif
926   }
927   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
928 
929   /* inform other processors of number of messages and max length*/
930   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
931 
932   /* post receives:   */
933   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
934   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
935   for (i=0; i<nrecvs; i++) {
936     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
937   }
938 
939   /* do sends:
940       1) starts[i] gives the starting index in svalues for stuff going to
941          the ith processor
942   */
943   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
944   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
945   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
946   starts[0] = 0;
947   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
948   for (i=0; i<N; i++) {
949     svalues[starts[owner[i]]++] = rows[i];
950   }
951 
952   starts[0] = 0;
953   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
954   count = 0;
955   for (i=0; i<size; i++) {
956     if (nprocs[2*i+1]) {
957       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
958     }
959   }
960   ierr = PetscFree(starts);CHKERRQ(ierr);
961 
962   base = owners[rank];
963 
964   /*  wait on receives */
965   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
966   count  = nrecvs; slen = 0;
967   while (count) {
968     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
969     /* unpack receives into our local space */
970     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
971     source[imdex]  = recv_status.MPI_SOURCE;
972     lens[imdex]    = n;
973     slen          += n;
974     count--;
975   }
976   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
977 
978   /* move the data into the send scatter */
979   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
980   count = 0;
981   for (i=0; i<nrecvs; i++) {
982     values = rvalues + i*nmax;
983     for (j=0; j<lens[i]; j++) {
984       lrows[count++] = values[j] - base;
985     }
986   }
987   ierr = PetscFree(rvalues);CHKERRQ(ierr);
988   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
989   ierr = PetscFree(owner);CHKERRQ(ierr);
990   ierr = PetscFree(nprocs);CHKERRQ(ierr);
991   /* lrows are the local rows to be zeroed, slen is the number of local rows */
992 
993   /* zero diagonal part of matrix */
994   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
995 
996   /* handle off diagonal part of matrix */
997   ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr);
998   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
999   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1000   for (i=0; i<slen; i++) {
1001     bb[lrows[i]] = 1;
1002   }
1003   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1004   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1005   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1006   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1007   if (x) {
1008     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1009     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1010     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1011     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1012   }
1013   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1014 
1015   /* remove zeroed rows of off diagonal matrix */
1016   ii = aij->i;
1017   for (i=0; i<slen; i++) {
1018     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1019   }
1020 
1021   /* loop over all elements of off process part of matrix zeroing removed columns*/
1022   if (aij->compressedrow.use){
1023     m    = aij->compressedrow.nrows;
1024     ii   = aij->compressedrow.i;
1025     ridx = aij->compressedrow.rindex;
1026     for (i=0; i<m; i++){
1027       n   = ii[i+1] - ii[i];
1028       aj  = aij->j + ii[i];
1029       aa  = aij->a + ii[i];
1030 
1031       for (j=0; j<n; j++) {
1032         if (PetscAbsScalar(mask[*aj])) {
1033           if (b) bb[*ridx] -= *aa*xx[*aj];
1034           *aa        = 0.0;
1035         }
1036         aa++;
1037         aj++;
1038       }
1039       ridx++;
1040     }
1041   } else { /* do not use compressed row format */
1042     m = l->B->rmap->n;
1043     for (i=0; i<m; i++) {
1044       n   = ii[i+1] - ii[i];
1045       aj  = aij->j + ii[i];
1046       aa  = aij->a + ii[i];
1047       for (j=0; j<n; j++) {
1048         if (PetscAbsScalar(mask[*aj])) {
1049           if (b) bb[i] -= *aa*xx[*aj];
1050           *aa    = 0.0;
1051         }
1052         aa++;
1053         aj++;
1054       }
1055     }
1056   }
1057   if (x) {
1058     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1059     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1060   }
1061   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1062   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1063   ierr = PetscFree(lrows);CHKERRQ(ierr);
1064 
1065   /* wait on sends */
1066   if (nsends) {
1067     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1068     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1069     ierr = PetscFree(send_status);CHKERRQ(ierr);
1070   }
1071   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1072   ierr = PetscFree(svalues);CHKERRQ(ierr);
1073 
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatMult_MPIAIJ"
1079 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1080 {
1081   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1082   PetscErrorCode ierr;
1083   PetscInt       nt;
1084 
1085   PetscFunctionBegin;
1086   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1087   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1088   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1089   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1090   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1091   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1097 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1098 {
1099   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1100   PetscErrorCode ierr;
1101 
1102   PetscFunctionBegin;
1103   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1109 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1110 {
1111   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1112   PetscErrorCode ierr;
1113 
1114   PetscFunctionBegin;
1115   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1116   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1117   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1118   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1124 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1125 {
1126   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1127   PetscErrorCode ierr;
1128   PetscBool      merged;
1129 
1130   PetscFunctionBegin;
1131   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1132   /* do nondiagonal part */
1133   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1134   if (!merged) {
1135     /* send it on its way */
1136     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1137     /* do local part */
1138     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1139     /* receive remote parts: note this assumes the values are not actually */
1140     /* added in yy until the next line, */
1141     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1142   } else {
1143     /* do local part */
1144     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1145     /* send it on its way */
1146     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1147     /* values actually were received in the Begin() but we need to call this nop */
1148     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1149   }
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 EXTERN_C_BEGIN
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1156 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1157 {
1158   MPI_Comm       comm;
1159   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
1160   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1161   IS             Me,Notme;
1162   PetscErrorCode ierr;
1163   PetscInt       M,N,first,last,*notme,i;
1164   PetscMPIInt    size;
1165 
1166   PetscFunctionBegin;
1167 
1168   /* Easy test: symmetric diagonal block */
1169   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
1170   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1171   if (!*f) PetscFunctionReturn(0);
1172   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1173   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1174   if (size == 1) PetscFunctionReturn(0);
1175 
1176   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1177   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1178   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1179   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1180   for (i=0; i<first; i++) notme[i] = i;
1181   for (i=last; i<M; i++) notme[i-last+first] = i;
1182   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1183   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1184   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1185   Aoff = Aoffs[0];
1186   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1187   Boff = Boffs[0];
1188   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1189   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1190   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1191   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1192   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1193   ierr = PetscFree(notme);CHKERRQ(ierr);
1194    PetscFunctionReturn(0);
1195 }
1196 EXTERN_C_END
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1200 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1201 {
1202   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206   /* do nondiagonal part */
1207   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1208   /* send it on its way */
1209   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   /* do local part */
1211   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1212   /* receive remote parts */
1213   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 /*
1218   This only works correctly for square matrices where the subblock A->A is the
1219    diagonal block
1220 */
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1223 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1224 {
1225   PetscErrorCode ierr;
1226   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1227 
1228   PetscFunctionBegin;
1229   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1230   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1231   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatScale_MPIAIJ"
1237 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1238 {
1239   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1244   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatDestroy_MPIAIJ"
1250 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1251 {
1252   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1253   PetscErrorCode ierr;
1254 
1255   PetscFunctionBegin;
1256 #if defined(PETSC_USE_LOG)
1257   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1258 #endif
1259   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1260   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1261   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1262   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1263 #if defined (PETSC_USE_CTABLE)
1264   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1265 #else
1266   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1267 #endif
1268   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1269   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1270   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1271   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1272   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1273   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1274 
1275   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1276   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1277   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1278   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1279   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 #undef __FUNCT__
1288 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1289 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1290 {
1291   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1292   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
1293   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
1294   PetscErrorCode    ierr;
1295   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
1296   int               fd;
1297   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
1298   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1299   PetscScalar       *column_values;
1300   PetscInt          message_count,flowcontrolcount;
1301 
1302   PetscFunctionBegin;
1303   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1304   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1305   nz   = A->nz + B->nz;
1306   if (!rank) {
1307     header[0] = MAT_FILE_CLASSID;
1308     header[1] = mat->rmap->N;
1309     header[2] = mat->cmap->N;
1310     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1311     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1312     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1313     /* get largest number of rows any processor has */
1314     rlen = mat->rmap->n;
1315     range = mat->rmap->range;
1316     for (i=1; i<size; i++) {
1317       rlen = PetscMax(rlen,range[i+1] - range[i]);
1318     }
1319   } else {
1320     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1321     rlen = mat->rmap->n;
1322   }
1323 
1324   /* load up the local row counts */
1325   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1326   for (i=0; i<mat->rmap->n; i++) {
1327     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1328   }
1329 
1330   /* store the row lengths to the file */
1331   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1332   if (!rank) {
1333     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1334     for (i=1; i<size; i++) {
1335       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1336       rlen = range[i+1] - range[i];
1337       ierr = MPILong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1338       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1339     }
1340     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1341   } else {
1342     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1343     ierr = MPILong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1344     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1345   }
1346   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1347 
1348   /* load up the local column indices */
1349   nzmax = nz; /* )th processor needs space a largest processor needs */
1350   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1351   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1352   cnt  = 0;
1353   for (i=0; i<mat->rmap->n; i++) {
1354     for (j=B->i[i]; j<B->i[i+1]; j++) {
1355       if ( (col = garray[B->j[j]]) > cstart) break;
1356       column_indices[cnt++] = col;
1357     }
1358     for (k=A->i[i]; k<A->i[i+1]; k++) {
1359       column_indices[cnt++] = A->j[k] + cstart;
1360     }
1361     for (; j<B->i[i+1]; j++) {
1362       column_indices[cnt++] = garray[B->j[j]];
1363     }
1364   }
1365   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1366 
1367   /* store the column indices to the file */
1368    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1369   if (!rank) {
1370     MPI_Status status;
1371     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1372     for (i=1; i<size; i++) {
1373       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1374       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1375       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1376       ierr = MPILong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1377       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1378     }
1379      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1380   } else {
1381     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1382     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1383     ierr = MPILong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1384     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1385   }
1386   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1387 
1388   /* load up the local column values */
1389   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1390   cnt  = 0;
1391   for (i=0; i<mat->rmap->n; i++) {
1392     for (j=B->i[i]; j<B->i[i+1]; j++) {
1393       if ( garray[B->j[j]] > cstart) break;
1394       column_values[cnt++] = B->a[j];
1395     }
1396     for (k=A->i[i]; k<A->i[i+1]; k++) {
1397       column_values[cnt++] = A->a[k];
1398     }
1399     for (; j<B->i[i+1]; j++) {
1400       column_values[cnt++] = B->a[j];
1401     }
1402   }
1403   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1404 
1405   /* store the column values to the file */
1406    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1407   if (!rank) {
1408     MPI_Status status;
1409     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1410     for (i=1; i<size; i++) {
1411        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1412       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1413       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1414       ierr = MPILong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1415       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1416     }
1417     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1418   } else {
1419     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1420     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1421     ierr = MPILong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1422     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1423   }
1424   ierr = PetscFree(column_values);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1430 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1431 {
1432   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1433   PetscErrorCode    ierr;
1434   PetscMPIInt       rank = aij->rank,size = aij->size;
1435   PetscBool         isdraw,iascii,isbinary;
1436   PetscViewer       sviewer;
1437   PetscViewerFormat format;
1438 
1439   PetscFunctionBegin;
1440   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1441   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1442   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1443   if (iascii) {
1444     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1445     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1446       MatInfo    info;
1447       PetscBool  inodes;
1448 
1449       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1450       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1451       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1452       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1453       if (!inodes) {
1454         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1455 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1456       } else {
1457         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1458 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1459       }
1460       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1461       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1462       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1463       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1464       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1465       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1467       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1468       PetscFunctionReturn(0);
1469     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1470       PetscInt   inodecount,inodelimit,*inodes;
1471       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1472       if (inodes) {
1473         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1474       } else {
1475         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1476       }
1477       PetscFunctionReturn(0);
1478     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1479       PetscFunctionReturn(0);
1480     }
1481   } else if (isbinary) {
1482     if (size == 1) {
1483       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1484       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1485     } else {
1486       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1487     }
1488     PetscFunctionReturn(0);
1489   } else if (isdraw) {
1490     PetscDraw  draw;
1491     PetscBool  isnull;
1492     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1493     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1494   }
1495 
1496   if (size == 1) {
1497     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499   } else {
1500     /* assemble the entire matrix onto first processor. */
1501     Mat         A;
1502     Mat_SeqAIJ  *Aloc;
1503     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1504     MatScalar   *a;
1505 
1506     if (mat->rmap->N > 1024) {
1507       PetscBool  flg = PETSC_FALSE;
1508 
1509       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1510       if (!flg) {
1511         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1512       }
1513     }
1514 
1515     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1516     if (!rank) {
1517       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1518     } else {
1519       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1520     }
1521     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1522     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1523     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1524     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1525     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1526 
1527     /* copy over the A part */
1528     Aloc = (Mat_SeqAIJ*)aij->A->data;
1529     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1530     row = mat->rmap->rstart;
1531     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1532     for (i=0; i<m; i++) {
1533       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1534       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1535     }
1536     aj = Aloc->j;
1537     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1538 
1539     /* copy over the B part */
1540     Aloc = (Mat_SeqAIJ*)aij->B->data;
1541     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1544     ct   = cols;
1545     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1546     for (i=0; i<m; i++) {
1547       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1548       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1549     }
1550     ierr = PetscFree(ct);CHKERRQ(ierr);
1551     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1552     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1553     /*
1554        Everyone has to call to draw the matrix since the graphics waits are
1555        synchronized across all processors that share the PetscDraw object
1556     */
1557     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1558     if (!rank) {
1559       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1560       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1561       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1562       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1563     }
1564     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1565     ierr = MatDestroy(&A);CHKERRQ(ierr);
1566   }
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatView_MPIAIJ"
1572 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1573 {
1574   PetscErrorCode ierr;
1575   PetscBool      iascii,isdraw,issocket,isbinary;
1576 
1577   PetscFunctionBegin;
1578   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1579   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1580   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1581   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1582   if (iascii || isdraw || isbinary || issocket) {
1583     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1584   } else {
1585     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatSOR_MPIAIJ"
1592 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1593 {
1594   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1595   PetscErrorCode ierr;
1596   Vec            bb1 = 0;
1597   PetscBool      hasop;
1598 
1599   PetscFunctionBegin;
1600   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1601     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1602   }
1603 
1604   if (flag == SOR_APPLY_UPPER) {
1605     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1606     PetscFunctionReturn(0);
1607   }
1608 
1609   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1610     if (flag & SOR_ZERO_INITIAL_GUESS) {
1611       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1612       its--;
1613     }
1614 
1615     while (its--) {
1616       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1617       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1618 
1619       /* update rhs: bb1 = bb - B*x */
1620       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1621       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1622 
1623       /* local sweep */
1624       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1625     }
1626   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1627     if (flag & SOR_ZERO_INITIAL_GUESS) {
1628       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1629       its--;
1630     }
1631     while (its--) {
1632       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1633       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1634 
1635       /* update rhs: bb1 = bb - B*x */
1636       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1637       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1638 
1639       /* local sweep */
1640       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1641     }
1642   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1643     if (flag & SOR_ZERO_INITIAL_GUESS) {
1644       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1645       its--;
1646     }
1647     while (its--) {
1648       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1649       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1650 
1651       /* update rhs: bb1 = bb - B*x */
1652       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1653       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1654 
1655       /* local sweep */
1656       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1657     }
1658   }  else if (flag & SOR_EISENSTAT) {
1659     Vec         xx1;
1660 
1661     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1662     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1663 
1664     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1665     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1666     if (!mat->diag) {
1667       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1668       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1669     }
1670     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1671     if (hasop) {
1672       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1673     } else {
1674       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1675     }
1676     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1677 
1678     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1679 
1680     /* local sweep */
1681     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1682     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1683     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1684   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1685 
1686   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatPermute_MPIAIJ"
1692 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1693 {
1694   MPI_Comm       comm;
1695   PetscInt       first,local_rowsize,local_colsize;
1696   const PetscInt *rows;
1697   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1698   PetscErrorCode ierr;
1699 
1700   PetscFunctionBegin;
1701   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1702   /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */
1703   ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr);
1704   /* collect the global row permutation and invert it */
1705   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1706   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1707   ierr = ISDestroy(&crowp);CHKERRQ(ierr);
1708   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1709   ierr = ISDestroy(&growp);CHKERRQ(ierr);
1710   /* get the local target indices */
1711   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1712   ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr);
1713   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1714   ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1715   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1716   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
1717   /* the column permutation is so much easier;
1718      make a local version of 'colp' and invert it */
1719   ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr);
1720   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1721   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1722   ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
1723   /* now we just get the submatrix */
1724   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1725   /* clean up */
1726   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
1727   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1733 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1734 {
1735   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1736   Mat            A = mat->A,B = mat->B;
1737   PetscErrorCode ierr;
1738   PetscReal      isend[5],irecv[5];
1739 
1740   PetscFunctionBegin;
1741   info->block_size     = 1.0;
1742   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1743   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1744   isend[3] = info->memory;  isend[4] = info->mallocs;
1745   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1746   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1747   isend[3] += info->memory;  isend[4] += info->mallocs;
1748   if (flag == MAT_LOCAL) {
1749     info->nz_used      = isend[0];
1750     info->nz_allocated = isend[1];
1751     info->nz_unneeded  = isend[2];
1752     info->memory       = isend[3];
1753     info->mallocs      = isend[4];
1754   } else if (flag == MAT_GLOBAL_MAX) {
1755     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1756     info->nz_used      = irecv[0];
1757     info->nz_allocated = irecv[1];
1758     info->nz_unneeded  = irecv[2];
1759     info->memory       = irecv[3];
1760     info->mallocs      = irecv[4];
1761   } else if (flag == MAT_GLOBAL_SUM) {
1762     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1763     info->nz_used      = irecv[0];
1764     info->nz_allocated = irecv[1];
1765     info->nz_unneeded  = irecv[2];
1766     info->memory       = irecv[3];
1767     info->mallocs      = irecv[4];
1768   }
1769   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1770   info->fill_ratio_needed = 0;
1771   info->factor_mallocs    = 0;
1772 
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 #undef __FUNCT__
1777 #define __FUNCT__ "MatSetOption_MPIAIJ"
1778 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1779 {
1780   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1781   PetscErrorCode ierr;
1782 
1783   PetscFunctionBegin;
1784   switch (op) {
1785   case MAT_NEW_NONZERO_LOCATIONS:
1786   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1787   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1788   case MAT_KEEP_NONZERO_PATTERN:
1789   case MAT_NEW_NONZERO_LOCATION_ERR:
1790   case MAT_USE_INODES:
1791   case MAT_IGNORE_ZERO_ENTRIES:
1792     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1793     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1794     break;
1795   case MAT_ROW_ORIENTED:
1796     a->roworiented = flg;
1797     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1798     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1799     break;
1800   case MAT_NEW_DIAGONALS:
1801     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1802     break;
1803   case MAT_IGNORE_OFF_PROC_ENTRIES:
1804     a->donotstash = flg;
1805     break;
1806   case MAT_SPD:
1807     A->spd_set                         = PETSC_TRUE;
1808     A->spd                             = flg;
1809     if (flg) {
1810       A->symmetric                     = PETSC_TRUE;
1811       A->structurally_symmetric        = PETSC_TRUE;
1812       A->symmetric_set                 = PETSC_TRUE;
1813       A->structurally_symmetric_set    = PETSC_TRUE;
1814     }
1815     break;
1816   case MAT_SYMMETRIC:
1817     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1818     break;
1819   case MAT_STRUCTURALLY_SYMMETRIC:
1820     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1821     break;
1822   case MAT_HERMITIAN:
1823     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1824     break;
1825   case MAT_SYMMETRY_ETERNAL:
1826     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1827     break;
1828   default:
1829     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1830   }
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatGetRow_MPIAIJ"
1836 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1837 {
1838   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1839   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1840   PetscErrorCode ierr;
1841   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1842   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1843   PetscInt       *cmap,*idx_p;
1844 
1845   PetscFunctionBegin;
1846   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1847   mat->getrowactive = PETSC_TRUE;
1848 
1849   if (!mat->rowvalues && (idx || v)) {
1850     /*
1851         allocate enough space to hold information from the longest row.
1852     */
1853     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1854     PetscInt   max = 1,tmp;
1855     for (i=0; i<matin->rmap->n; i++) {
1856       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1857       if (max < tmp) { max = tmp; }
1858     }
1859     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1860   }
1861 
1862   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1863   lrow = row - rstart;
1864 
1865   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1866   if (!v)   {pvA = 0; pvB = 0;}
1867   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1868   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1869   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1870   nztot = nzA + nzB;
1871 
1872   cmap  = mat->garray;
1873   if (v  || idx) {
1874     if (nztot) {
1875       /* Sort by increasing column numbers, assuming A and B already sorted */
1876       PetscInt imark = -1;
1877       if (v) {
1878         *v = v_p = mat->rowvalues;
1879         for (i=0; i<nzB; i++) {
1880           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1881           else break;
1882         }
1883         imark = i;
1884         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1885         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1886       }
1887       if (idx) {
1888         *idx = idx_p = mat->rowindices;
1889         if (imark > -1) {
1890           for (i=0; i<imark; i++) {
1891             idx_p[i] = cmap[cworkB[i]];
1892           }
1893         } else {
1894           for (i=0; i<nzB; i++) {
1895             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1896             else break;
1897           }
1898           imark = i;
1899         }
1900         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1901         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1902       }
1903     } else {
1904       if (idx) *idx = 0;
1905       if (v)   *v   = 0;
1906     }
1907   }
1908   *nz = nztot;
1909   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1910   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1916 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1917 {
1918   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1919 
1920   PetscFunctionBegin;
1921   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1922   aij->getrowactive = PETSC_FALSE;
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 #undef __FUNCT__
1927 #define __FUNCT__ "MatNorm_MPIAIJ"
1928 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1929 {
1930   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1931   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1932   PetscErrorCode ierr;
1933   PetscInt       i,j,cstart = mat->cmap->rstart;
1934   PetscReal      sum = 0.0;
1935   MatScalar      *v;
1936 
1937   PetscFunctionBegin;
1938   if (aij->size == 1) {
1939     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1940   } else {
1941     if (type == NORM_FROBENIUS) {
1942       v = amat->a;
1943       for (i=0; i<amat->nz; i++) {
1944 #if defined(PETSC_USE_COMPLEX)
1945         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1946 #else
1947         sum += (*v)*(*v); v++;
1948 #endif
1949       }
1950       v = bmat->a;
1951       for (i=0; i<bmat->nz; i++) {
1952 #if defined(PETSC_USE_COMPLEX)
1953         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1954 #else
1955         sum += (*v)*(*v); v++;
1956 #endif
1957       }
1958       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1959       *norm = PetscSqrtReal(*norm);
1960     } else if (type == NORM_1) { /* max column norm */
1961       PetscReal *tmp,*tmp2;
1962       PetscInt  *jj,*garray = aij->garray;
1963       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1964       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1965       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1966       *norm = 0.0;
1967       v = amat->a; jj = amat->j;
1968       for (j=0; j<amat->nz; j++) {
1969         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1970       }
1971       v = bmat->a; jj = bmat->j;
1972       for (j=0; j<bmat->nz; j++) {
1973         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1974       }
1975       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1976       for (j=0; j<mat->cmap->N; j++) {
1977         if (tmp2[j] > *norm) *norm = tmp2[j];
1978       }
1979       ierr = PetscFree(tmp);CHKERRQ(ierr);
1980       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1981     } else if (type == NORM_INFINITY) { /* max row norm */
1982       PetscReal ntemp = 0.0;
1983       for (j=0; j<aij->A->rmap->n; j++) {
1984         v = amat->a + amat->i[j];
1985         sum = 0.0;
1986         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1987           sum += PetscAbsScalar(*v); v++;
1988         }
1989         v = bmat->a + bmat->i[j];
1990         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1991           sum += PetscAbsScalar(*v); v++;
1992         }
1993         if (sum > ntemp) ntemp = sum;
1994       }
1995       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1996     } else {
1997       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1998     }
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatTranspose_MPIAIJ"
2005 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2006 {
2007   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2008   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2009   PetscErrorCode ierr;
2010   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
2011   PetscInt       cstart=A->cmap->rstart,ncol;
2012   Mat            B;
2013   MatScalar      *array;
2014 
2015   PetscFunctionBegin;
2016   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2017 
2018   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
2019   ai = Aloc->i; aj = Aloc->j;
2020   bi = Bloc->i; bj = Bloc->j;
2021   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2022     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
2023     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2024     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
2025     for (i=0; i<ai[ma]; i++){
2026       d_nnz[aj[i]] ++;
2027       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2028     }
2029 
2030     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
2031     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2032     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2033     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
2034     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
2035     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2036   } else {
2037     B = *matout;
2038   }
2039 
2040   /* copy over the A part */
2041   array = Aloc->a;
2042   row = A->rmap->rstart;
2043   for (i=0; i<ma; i++) {
2044     ncol = ai[i+1]-ai[i];
2045     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2046     row++; array += ncol; aj += ncol;
2047   }
2048   aj = Aloc->j;
2049   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2050 
2051   /* copy over the B part */
2052   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2053   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2054   array = Bloc->a;
2055   row = A->rmap->rstart;
2056   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
2057   cols_tmp = cols;
2058   for (i=0; i<mb; i++) {
2059     ncol = bi[i+1]-bi[i];
2060     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2061     row++; array += ncol; cols_tmp += ncol;
2062   }
2063   ierr = PetscFree(cols);CHKERRQ(ierr);
2064 
2065   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2066   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2067   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2068     *matout = B;
2069   } else {
2070     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2071   }
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 #undef __FUNCT__
2076 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2077 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2078 {
2079   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2080   Mat            a = aij->A,b = aij->B;
2081   PetscErrorCode ierr;
2082   PetscInt       s1,s2,s3;
2083 
2084   PetscFunctionBegin;
2085   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2086   if (rr) {
2087     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2088     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2089     /* Overlap communication with computation. */
2090     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2091   }
2092   if (ll) {
2093     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2094     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2095     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2096   }
2097   /* scale  the diagonal block */
2098   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2099 
2100   if (rr) {
2101     /* Do a scatter end and then right scale the off-diagonal block */
2102     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2103     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2104   }
2105 
2106   PetscFunctionReturn(0);
2107 }
2108 
2109 #undef __FUNCT__
2110 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
2111 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
2112 {
2113   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2114   PetscErrorCode ierr;
2115 
2116   PetscFunctionBegin;
2117   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2118   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2119   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
2120   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
2121   PetscFunctionReturn(0);
2122 }
2123 #undef __FUNCT__
2124 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2125 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2126 {
2127   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2128   PetscErrorCode ierr;
2129 
2130   PetscFunctionBegin;
2131   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 #undef __FUNCT__
2136 #define __FUNCT__ "MatEqual_MPIAIJ"
2137 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2138 {
2139   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2140   Mat            a,b,c,d;
2141   PetscBool      flg;
2142   PetscErrorCode ierr;
2143 
2144   PetscFunctionBegin;
2145   a = matA->A; b = matA->B;
2146   c = matB->A; d = matB->B;
2147 
2148   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2149   if (flg) {
2150     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2151   }
2152   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
2153   PetscFunctionReturn(0);
2154 }
2155 
2156 #undef __FUNCT__
2157 #define __FUNCT__ "MatCopy_MPIAIJ"
2158 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2159 {
2160   PetscErrorCode ierr;
2161   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
2162   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2163 
2164   PetscFunctionBegin;
2165   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2166   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2167     /* because of the column compression in the off-processor part of the matrix a->B,
2168        the number of columns in a->B and b->B may be different, hence we cannot call
2169        the MatCopy() directly on the two parts. If need be, we can provide a more
2170        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2171        then copying the submatrices */
2172     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2173   } else {
2174     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2175     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2176   }
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 #undef __FUNCT__
2181 #define __FUNCT__ "MatSetUp_MPIAIJ"
2182 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2183 {
2184   PetscErrorCode ierr;
2185 
2186   PetscFunctionBegin;
2187   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatAXPY_MPIAIJ"
2193 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2194 {
2195   PetscErrorCode ierr;
2196   PetscInt       i;
2197   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
2198   PetscBLASInt   bnz,one=1;
2199   Mat_SeqAIJ     *x,*y;
2200 
2201   PetscFunctionBegin;
2202   if (str == SAME_NONZERO_PATTERN) {
2203     PetscScalar alpha = a;
2204     x = (Mat_SeqAIJ *)xx->A->data;
2205     y = (Mat_SeqAIJ *)yy->A->data;
2206     bnz = PetscBLASIntCast(x->nz);
2207     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2208     x = (Mat_SeqAIJ *)xx->B->data;
2209     y = (Mat_SeqAIJ *)yy->B->data;
2210     bnz = PetscBLASIntCast(x->nz);
2211     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2212   } else if (str == SUBSET_NONZERO_PATTERN) {
2213     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2214 
2215     x = (Mat_SeqAIJ *)xx->B->data;
2216     y = (Mat_SeqAIJ *)yy->B->data;
2217     if (y->xtoy && y->XtoY != xx->B) {
2218       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2219       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2220     }
2221     if (!y->xtoy) { /* get xtoy */
2222       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2223       y->XtoY = xx->B;
2224       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2225     }
2226     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2227   } else {
2228     Mat B;
2229     PetscInt *nnz_d,*nnz_o;
2230     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2231     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2232     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
2233     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2234     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2235     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2236     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2237     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
2238     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2239     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2240     ierr = MatHeaderReplace(Y,B);
2241     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2242     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2243   }
2244   PetscFunctionReturn(0);
2245 }
2246 
2247 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2248 
2249 #undef __FUNCT__
2250 #define __FUNCT__ "MatConjugate_MPIAIJ"
2251 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2252 {
2253 #if defined(PETSC_USE_COMPLEX)
2254   PetscErrorCode ierr;
2255   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2256 
2257   PetscFunctionBegin;
2258   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2259   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2260 #else
2261   PetscFunctionBegin;
2262 #endif
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 #undef __FUNCT__
2267 #define __FUNCT__ "MatRealPart_MPIAIJ"
2268 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2269 {
2270   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2271   PetscErrorCode ierr;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2275   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2276   PetscFunctionReturn(0);
2277 }
2278 
2279 #undef __FUNCT__
2280 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2281 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2282 {
2283   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
2284   PetscErrorCode ierr;
2285 
2286   PetscFunctionBegin;
2287   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2288   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 #ifdef PETSC_HAVE_PBGL
2293 
2294 #include <boost/parallel/mpi/bsp_process_group.hpp>
2295 #include <boost/graph/distributed/ilu_default_graph.hpp>
2296 #include <boost/graph/distributed/ilu_0_block.hpp>
2297 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2298 #include <boost/graph/distributed/petsc/interface.hpp>
2299 #include <boost/multi_array.hpp>
2300 #include <boost/parallel/distributed_property_map->hpp>
2301 
2302 #undef __FUNCT__
2303 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2304 /*
2305   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2306 */
2307 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2308 {
2309   namespace petsc = boost::distributed::petsc;
2310 
2311   namespace graph_dist = boost::graph::distributed;
2312   using boost::graph::distributed::ilu_default::process_group_type;
2313   using boost::graph::ilu_permuted;
2314 
2315   PetscBool       row_identity, col_identity;
2316   PetscContainer  c;
2317   PetscInt        m, n, M, N;
2318   PetscErrorCode  ierr;
2319 
2320   PetscFunctionBegin;
2321   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2322   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2323   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2324   if (!row_identity || !col_identity) {
2325     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2326   }
2327 
2328   process_group_type pg;
2329   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2330   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2331   lgraph_type&   level_graph = *lgraph_p;
2332   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2333 
2334   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2335   ilu_permuted(level_graph);
2336 
2337   /* put together the new matrix */
2338   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2339   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2340   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2341   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2342   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2343   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2344   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2345 
2346   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2347   ierr = PetscContainerSetPointer(c, lgraph_p);
2348   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2349   ierr = PetscContainerDestroy(&c);
2350   PetscFunctionReturn(0);
2351 }
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2355 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2356 {
2357   PetscFunctionBegin;
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 #undef __FUNCT__
2362 #define __FUNCT__ "MatSolve_MPIAIJ"
2363 /*
2364   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2365 */
2366 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2367 {
2368   namespace graph_dist = boost::graph::distributed;
2369 
2370   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2371   lgraph_type*   lgraph_p;
2372   PetscContainer c;
2373   PetscErrorCode ierr;
2374 
2375   PetscFunctionBegin;
2376   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2377   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2378   ierr = VecCopy(b, x);CHKERRQ(ierr);
2379 
2380   PetscScalar* array_x;
2381   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2382   PetscInt sx;
2383   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2384 
2385   PetscScalar* array_b;
2386   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2387   PetscInt sb;
2388   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2389 
2390   lgraph_type&   level_graph = *lgraph_p;
2391   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2392 
2393   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2394   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2395                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2396 
2397   typedef boost::iterator_property_map<array_ref_type::iterator,
2398                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2399   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2400                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2401 
2402   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2403 
2404   PetscFunctionReturn(0);
2405 }
2406 #endif
2407 
2408 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2409   PetscInt       nzlocal,nsends,nrecvs;
2410   PetscMPIInt    *send_rank,*recv_rank;
2411   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2412   PetscScalar    *sbuf_a,**rbuf_a;
2413   PetscErrorCode (*Destroy)(Mat);
2414 } Mat_Redundant;
2415 
2416 #undef __FUNCT__
2417 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2418 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2419 {
2420   PetscErrorCode       ierr;
2421   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2422   PetscInt             i;
2423 
2424   PetscFunctionBegin;
2425   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2426   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2427   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2428   for (i=0; i<redund->nrecvs; i++){
2429     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2430     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2431   }
2432   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2433   ierr = PetscFree(redund);CHKERRQ(ierr);
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 #undef __FUNCT__
2438 #define __FUNCT__ "MatDestroy_MatRedundant"
2439 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2440 {
2441   PetscErrorCode  ierr;
2442   PetscContainer  container;
2443   Mat_Redundant   *redund=PETSC_NULL;
2444 
2445   PetscFunctionBegin;
2446   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2447   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2448   ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2449   A->ops->destroy = redund->Destroy;
2450   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2451   if (A->ops->destroy) {
2452     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2453   }
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 #undef __FUNCT__
2458 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2459 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2460 {
2461   PetscMPIInt    rank,size;
2462   MPI_Comm       comm=((PetscObject)mat)->comm;
2463   PetscErrorCode ierr;
2464   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2465   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2466   PetscInt       *rowrange=mat->rmap->range;
2467   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2468   Mat            A=aij->A,B=aij->B,C=*matredundant;
2469   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2470   PetscScalar    *sbuf_a;
2471   PetscInt       nzlocal=a->nz+b->nz;
2472   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2473   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2474   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2475   MatScalar      *aworkA,*aworkB;
2476   PetscScalar    *vals;
2477   PetscMPIInt    tag1,tag2,tag3,imdex;
2478   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2479                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2480   MPI_Status     recv_status,*send_status;
2481   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2482   PetscInt       **rbuf_j=PETSC_NULL;
2483   PetscScalar    **rbuf_a=PETSC_NULL;
2484   Mat_Redundant  *redund=PETSC_NULL;
2485   PetscContainer container;
2486 
2487   PetscFunctionBegin;
2488   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2489   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2490 
2491   if (reuse == MAT_REUSE_MATRIX) {
2492     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2493     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2494     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2495     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2496     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2497     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2498     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2499     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2500 
2501     nsends    = redund->nsends;
2502     nrecvs    = redund->nrecvs;
2503     send_rank = redund->send_rank;
2504     recv_rank = redund->recv_rank;
2505     sbuf_nz   = redund->sbuf_nz;
2506     rbuf_nz   = redund->rbuf_nz;
2507     sbuf_j    = redund->sbuf_j;
2508     sbuf_a    = redund->sbuf_a;
2509     rbuf_j    = redund->rbuf_j;
2510     rbuf_a    = redund->rbuf_a;
2511   }
2512 
2513   if (reuse == MAT_INITIAL_MATRIX){
2514     PetscMPIInt  subrank,subsize;
2515     PetscInt     nleftover,np_subcomm;
2516     /* get the destination processors' id send_rank, nsends and nrecvs */
2517     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2518     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2519     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2520     np_subcomm = size/nsubcomm;
2521     nleftover  = size - nsubcomm*np_subcomm;
2522     nsends = 0; nrecvs = 0;
2523     for (i=0; i<size; i++){ /* i=rank*/
2524       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2525         send_rank[nsends] = i; nsends++;
2526         recv_rank[nrecvs++] = i;
2527       }
2528     }
2529     if (rank >= size - nleftover){/* this proc is a leftover processor */
2530       i = size-nleftover-1;
2531       j = 0;
2532       while (j < nsubcomm - nleftover){
2533         send_rank[nsends++] = i;
2534         i--; j++;
2535       }
2536     }
2537 
2538     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2539       for (i=0; i<nleftover; i++){
2540         recv_rank[nrecvs++] = size-nleftover+i;
2541       }
2542     }
2543 
2544     /* allocate sbuf_j, sbuf_a */
2545     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2546     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2547     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2548   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2549 
2550   /* copy mat's local entries into the buffers */
2551   if (reuse == MAT_INITIAL_MATRIX){
2552     rownz_max = 0;
2553     rptr = sbuf_j;
2554     cols = sbuf_j + rend-rstart + 1;
2555     vals = sbuf_a;
2556     rptr[0] = 0;
2557     for (i=0; i<rend-rstart; i++){
2558       row = i + rstart;
2559       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2560       ncols  = nzA + nzB;
2561       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2562       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2563       /* load the column indices for this row into cols */
2564       lwrite = 0;
2565       for (l=0; l<nzB; l++) {
2566         if ((ctmp = bmap[cworkB[l]]) < cstart){
2567           vals[lwrite]   = aworkB[l];
2568           cols[lwrite++] = ctmp;
2569         }
2570       }
2571       for (l=0; l<nzA; l++){
2572         vals[lwrite]   = aworkA[l];
2573         cols[lwrite++] = cstart + cworkA[l];
2574       }
2575       for (l=0; l<nzB; l++) {
2576         if ((ctmp = bmap[cworkB[l]]) >= cend){
2577           vals[lwrite]   = aworkB[l];
2578           cols[lwrite++] = ctmp;
2579         }
2580       }
2581       vals += ncols;
2582       cols += ncols;
2583       rptr[i+1] = rptr[i] + ncols;
2584       if (rownz_max < ncols) rownz_max = ncols;
2585     }
2586     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2587   } else { /* only copy matrix values into sbuf_a */
2588     rptr = sbuf_j;
2589     vals = sbuf_a;
2590     rptr[0] = 0;
2591     for (i=0; i<rend-rstart; i++){
2592       row = i + rstart;
2593       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2594       ncols  = nzA + nzB;
2595       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2596       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2597       lwrite = 0;
2598       for (l=0; l<nzB; l++) {
2599         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2600       }
2601       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2602       for (l=0; l<nzB; l++) {
2603         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2604       }
2605       vals += ncols;
2606       rptr[i+1] = rptr[i] + ncols;
2607     }
2608   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2609 
2610   /* send nzlocal to others, and recv other's nzlocal */
2611   /*--------------------------------------------------*/
2612   if (reuse == MAT_INITIAL_MATRIX){
2613     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2614     s_waits2 = s_waits3 + nsends;
2615     s_waits1 = s_waits2 + nsends;
2616     r_waits1 = s_waits1 + nsends;
2617     r_waits2 = r_waits1 + nrecvs;
2618     r_waits3 = r_waits2 + nrecvs;
2619   } else {
2620     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2621     r_waits3 = s_waits3 + nsends;
2622   }
2623 
2624   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2625   if (reuse == MAT_INITIAL_MATRIX){
2626     /* get new tags to keep the communication clean */
2627     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2628     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2629     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2630 
2631     /* post receives of other's nzlocal */
2632     for (i=0; i<nrecvs; i++){
2633       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2634     }
2635     /* send nzlocal to others */
2636     for (i=0; i<nsends; i++){
2637       sbuf_nz[i] = nzlocal;
2638       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2639     }
2640     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2641     count = nrecvs;
2642     while (count) {
2643       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2644       recv_rank[imdex] = recv_status.MPI_SOURCE;
2645       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2646       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2647 
2648       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2649       rbuf_nz[imdex] += i + 2;
2650       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2651       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2652       count--;
2653     }
2654     /* wait on sends of nzlocal */
2655     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2656     /* send mat->i,j to others, and recv from other's */
2657     /*------------------------------------------------*/
2658     for (i=0; i<nsends; i++){
2659       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2660       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2661     }
2662     /* wait on receives of mat->i,j */
2663     /*------------------------------*/
2664     count = nrecvs;
2665     while (count) {
2666       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2667       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2668       count--;
2669     }
2670     /* wait on sends of mat->i,j */
2671     /*---------------------------*/
2672     if (nsends) {
2673       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2674     }
2675   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2676 
2677   /* post receives, send and receive mat->a */
2678   /*----------------------------------------*/
2679   for (imdex=0; imdex<nrecvs; imdex++) {
2680     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2681   }
2682   for (i=0; i<nsends; i++){
2683     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2684   }
2685   count = nrecvs;
2686   while (count) {
2687     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2688     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2689     count--;
2690   }
2691   if (nsends) {
2692     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2693   }
2694 
2695   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2696 
2697   /* create redundant matrix */
2698   /*-------------------------*/
2699   if (reuse == MAT_INITIAL_MATRIX){
2700     /* compute rownz_max for preallocation */
2701     for (imdex=0; imdex<nrecvs; imdex++){
2702       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2703       rptr = rbuf_j[imdex];
2704       for (i=0; i<j; i++){
2705         ncols = rptr[i+1] - rptr[i];
2706         if (rownz_max < ncols) rownz_max = ncols;
2707       }
2708     }
2709 
2710     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2711     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2712     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2713     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2714     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2715   } else {
2716     C = *matredundant;
2717   }
2718 
2719   /* insert local matrix entries */
2720   rptr = sbuf_j;
2721   cols = sbuf_j + rend-rstart + 1;
2722   vals = sbuf_a;
2723   for (i=0; i<rend-rstart; i++){
2724     row   = i + rstart;
2725     ncols = rptr[i+1] - rptr[i];
2726     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2727     vals += ncols;
2728     cols += ncols;
2729   }
2730   /* insert received matrix entries */
2731   for (imdex=0; imdex<nrecvs; imdex++){
2732     rstart = rowrange[recv_rank[imdex]];
2733     rend   = rowrange[recv_rank[imdex]+1];
2734     rptr = rbuf_j[imdex];
2735     cols = rbuf_j[imdex] + rend-rstart + 1;
2736     vals = rbuf_a[imdex];
2737     for (i=0; i<rend-rstart; i++){
2738       row   = i + rstart;
2739       ncols = rptr[i+1] - rptr[i];
2740       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2741       vals += ncols;
2742       cols += ncols;
2743     }
2744   }
2745   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2746   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2747   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2748   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2749   if (reuse == MAT_INITIAL_MATRIX) {
2750     PetscContainer container;
2751     *matredundant = C;
2752     /* create a supporting struct and attach it to C for reuse */
2753     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2754     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2755     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2756     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2757     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2758     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2759 
2760     redund->nzlocal = nzlocal;
2761     redund->nsends  = nsends;
2762     redund->nrecvs  = nrecvs;
2763     redund->send_rank = send_rank;
2764     redund->recv_rank = recv_rank;
2765     redund->sbuf_nz = sbuf_nz;
2766     redund->rbuf_nz = rbuf_nz;
2767     redund->sbuf_j  = sbuf_j;
2768     redund->sbuf_a  = sbuf_a;
2769     redund->rbuf_j  = rbuf_j;
2770     redund->rbuf_a  = rbuf_a;
2771 
2772     redund->Destroy = C->ops->destroy;
2773     C->ops->destroy = MatDestroy_MatRedundant;
2774   }
2775   PetscFunctionReturn(0);
2776 }
2777 
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2780 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2781 {
2782   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2783   PetscErrorCode ierr;
2784   PetscInt       i,*idxb = 0;
2785   PetscScalar    *va,*vb;
2786   Vec            vtmp;
2787 
2788   PetscFunctionBegin;
2789   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2790   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2791   if (idx) {
2792     for (i=0; i<A->rmap->n; i++) {
2793       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2794     }
2795   }
2796 
2797   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2798   if (idx) {
2799     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2800   }
2801   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2802   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2803 
2804   for (i=0; i<A->rmap->n; i++){
2805     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2806       va[i] = vb[i];
2807       if (idx) idx[i] = a->garray[idxb[i]];
2808     }
2809   }
2810 
2811   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2812   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2813   ierr = PetscFree(idxb);CHKERRQ(ierr);
2814   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2815   PetscFunctionReturn(0);
2816 }
2817 
2818 #undef __FUNCT__
2819 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2820 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2821 {
2822   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2823   PetscErrorCode ierr;
2824   PetscInt       i,*idxb = 0;
2825   PetscScalar    *va,*vb;
2826   Vec            vtmp;
2827 
2828   PetscFunctionBegin;
2829   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2830   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2831   if (idx) {
2832     for (i=0; i<A->cmap->n; i++) {
2833       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2834     }
2835   }
2836 
2837   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2838   if (idx) {
2839     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2840   }
2841   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2842   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2843 
2844   for (i=0; i<A->rmap->n; i++){
2845     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2846       va[i] = vb[i];
2847       if (idx) idx[i] = a->garray[idxb[i]];
2848     }
2849   }
2850 
2851   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2852   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2853   ierr = PetscFree(idxb);CHKERRQ(ierr);
2854   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2855   PetscFunctionReturn(0);
2856 }
2857 
2858 #undef __FUNCT__
2859 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2860 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2861 {
2862   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2863   PetscInt       n      = A->rmap->n;
2864   PetscInt       cstart = A->cmap->rstart;
2865   PetscInt      *cmap   = mat->garray;
2866   PetscInt      *diagIdx, *offdiagIdx;
2867   Vec            diagV, offdiagV;
2868   PetscScalar   *a, *diagA, *offdiagA;
2869   PetscInt       r;
2870   PetscErrorCode ierr;
2871 
2872   PetscFunctionBegin;
2873   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2874   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2875   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2876   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2877   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2878   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2879   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2880   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2881   for(r = 0; r < n; ++r) {
2882     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2883       a[r]   = diagA[r];
2884       idx[r] = cstart + diagIdx[r];
2885     } else {
2886       a[r]   = offdiagA[r];
2887       idx[r] = cmap[offdiagIdx[r]];
2888     }
2889   }
2890   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2891   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2892   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2893   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2894   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2895   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2896   PetscFunctionReturn(0);
2897 }
2898 
2899 #undef __FUNCT__
2900 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2901 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2902 {
2903   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2904   PetscInt       n      = A->rmap->n;
2905   PetscInt       cstart = A->cmap->rstart;
2906   PetscInt      *cmap   = mat->garray;
2907   PetscInt      *diagIdx, *offdiagIdx;
2908   Vec            diagV, offdiagV;
2909   PetscScalar   *a, *diagA, *offdiagA;
2910   PetscInt       r;
2911   PetscErrorCode ierr;
2912 
2913   PetscFunctionBegin;
2914   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2915   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2916   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2917   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2918   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2919   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2920   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2921   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2922   for(r = 0; r < n; ++r) {
2923     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2924       a[r]   = diagA[r];
2925       idx[r] = cstart + diagIdx[r];
2926     } else {
2927       a[r]   = offdiagA[r];
2928       idx[r] = cmap[offdiagIdx[r]];
2929     }
2930   }
2931   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2932   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2933   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2934   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2935   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2936   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2937   PetscFunctionReturn(0);
2938 }
2939 
2940 #undef __FUNCT__
2941 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2942 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2943 {
2944   PetscErrorCode ierr;
2945   Mat            *dummy;
2946 
2947   PetscFunctionBegin;
2948   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2949   *newmat = *dummy;
2950   ierr = PetscFree(dummy);CHKERRQ(ierr);
2951   PetscFunctionReturn(0);
2952 }
2953 
2954 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2955 
2956 #undef __FUNCT__
2957 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
2958 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values)
2959 {
2960   Mat_MPIAIJ    *a = (Mat_MPIAIJ*) A->data;
2961   PetscErrorCode ierr;
2962 
2963   PetscFunctionBegin;
2964   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2965   PetscFunctionReturn(0);
2966 }
2967 
2968 
2969 /* -------------------------------------------------------------------*/
2970 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2971        MatGetRow_MPIAIJ,
2972        MatRestoreRow_MPIAIJ,
2973        MatMult_MPIAIJ,
2974 /* 4*/ MatMultAdd_MPIAIJ,
2975        MatMultTranspose_MPIAIJ,
2976        MatMultTransposeAdd_MPIAIJ,
2977 #ifdef PETSC_HAVE_PBGL
2978        MatSolve_MPIAIJ,
2979 #else
2980        0,
2981 #endif
2982        0,
2983        0,
2984 /*10*/ 0,
2985        0,
2986        0,
2987        MatSOR_MPIAIJ,
2988        MatTranspose_MPIAIJ,
2989 /*15*/ MatGetInfo_MPIAIJ,
2990        MatEqual_MPIAIJ,
2991        MatGetDiagonal_MPIAIJ,
2992        MatDiagonalScale_MPIAIJ,
2993        MatNorm_MPIAIJ,
2994 /*20*/ MatAssemblyBegin_MPIAIJ,
2995        MatAssemblyEnd_MPIAIJ,
2996        MatSetOption_MPIAIJ,
2997        MatZeroEntries_MPIAIJ,
2998 /*24*/ MatZeroRows_MPIAIJ,
2999        0,
3000 #ifdef PETSC_HAVE_PBGL
3001        0,
3002 #else
3003        0,
3004 #endif
3005        0,
3006        0,
3007 /*29*/ MatSetUp_MPIAIJ,
3008 #ifdef PETSC_HAVE_PBGL
3009        0,
3010 #else
3011        0,
3012 #endif
3013        0,
3014        0,
3015        0,
3016 /*34*/ MatDuplicate_MPIAIJ,
3017        0,
3018        0,
3019        0,
3020        0,
3021 /*39*/ MatAXPY_MPIAIJ,
3022        MatGetSubMatrices_MPIAIJ,
3023        MatIncreaseOverlap_MPIAIJ,
3024        MatGetValues_MPIAIJ,
3025        MatCopy_MPIAIJ,
3026 /*44*/ MatGetRowMax_MPIAIJ,
3027        MatScale_MPIAIJ,
3028        0,
3029        0,
3030        MatZeroRowsColumns_MPIAIJ,
3031 /*49*/ MatSetBlockSize_MPIAIJ,
3032        0,
3033        0,
3034        0,
3035        0,
3036 /*54*/ MatFDColoringCreate_MPIAIJ,
3037        0,
3038        MatSetUnfactored_MPIAIJ,
3039        MatPermute_MPIAIJ,
3040        0,
3041 /*59*/ MatGetSubMatrix_MPIAIJ,
3042        MatDestroy_MPIAIJ,
3043        MatView_MPIAIJ,
3044        0,
3045        0,
3046 /*64*/ 0,
3047        0,
3048        0,
3049        0,
3050        0,
3051 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3052        MatGetRowMinAbs_MPIAIJ,
3053        0,
3054        MatSetColoring_MPIAIJ,
3055 #if defined(PETSC_HAVE_ADIC)
3056        MatSetValuesAdic_MPIAIJ,
3057 #else
3058        0,
3059 #endif
3060        MatSetValuesAdifor_MPIAIJ,
3061 /*75*/ MatFDColoringApply_AIJ,
3062        0,
3063        0,
3064        0,
3065        0,
3066 /*80*/ 0,
3067        0,
3068        0,
3069 /*83*/ MatLoad_MPIAIJ,
3070        0,
3071        0,
3072        0,
3073        0,
3074        0,
3075 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3076        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3077        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3078        MatPtAP_Basic,
3079        MatPtAPSymbolic_MPIAIJ,
3080 /*94*/ MatPtAPNumeric_MPIAIJ,
3081        0,
3082        0,
3083        0,
3084        0,
3085 /*99*/ 0,
3086        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3087        MatPtAPNumeric_MPIAIJ_MPIAIJ,
3088        MatConjugate_MPIAIJ,
3089        0,
3090 /*104*/MatSetValuesRow_MPIAIJ,
3091        MatRealPart_MPIAIJ,
3092        MatImaginaryPart_MPIAIJ,
3093        0,
3094        0,
3095 /*109*/0,
3096        MatGetRedundantMatrix_MPIAIJ,
3097        MatGetRowMin_MPIAIJ,
3098        0,
3099        0,
3100 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3101        0,
3102        0,
3103        0,
3104        0,
3105 /*119*/0,
3106        0,
3107        0,
3108        0,
3109        MatGetMultiProcBlock_MPIAIJ,
3110 /*124*/MatFindNonZeroRows_MPIAIJ,
3111        MatGetColumnNorms_MPIAIJ,
3112        MatInvertBlockDiagonal_MPIAIJ,
3113        0,
3114        MatGetSubMatricesParallel_MPIAIJ,
3115 /*129*/0,
3116        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3117        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3118        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3119        0,
3120 /*134*/0,
3121        0,
3122        0,
3123        0,
3124        0
3125 };
3126 
3127 /* ----------------------------------------------------------------------------------------*/
3128 
3129 EXTERN_C_BEGIN
3130 #undef __FUNCT__
3131 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3132 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3133 {
3134   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3135   PetscErrorCode ierr;
3136 
3137   PetscFunctionBegin;
3138   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3139   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3140   PetscFunctionReturn(0);
3141 }
3142 EXTERN_C_END
3143 
3144 EXTERN_C_BEGIN
3145 #undef __FUNCT__
3146 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3147 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3148 {
3149   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
3150   PetscErrorCode ierr;
3151 
3152   PetscFunctionBegin;
3153   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3154   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3155   PetscFunctionReturn(0);
3156 }
3157 EXTERN_C_END
3158 
3159 EXTERN_C_BEGIN
3160 #undef __FUNCT__
3161 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3162 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3163 {
3164   Mat_MPIAIJ     *b;
3165   PetscErrorCode ierr;
3166   PetscInt       i;
3167   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3168 
3169   PetscFunctionBegin;
3170   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3171   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3172   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3173   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3174   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3175   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3176 
3177   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3178   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3179   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3180   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3181   if (d_nnz) {
3182     for (i=0; i<B->rmap->n; i++) {
3183       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
3184     }
3185   }
3186   if (o_nnz) {
3187     for (i=0; i<B->rmap->n; i++) {
3188       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
3189     }
3190   }
3191   b = (Mat_MPIAIJ*)B->data;
3192 
3193   if (!B->preallocated) {
3194     /* Explicitly create 2 MATSEQAIJ matrices. */
3195     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3196     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3197     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3198     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3199     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3200     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3201     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3202     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3203   }
3204 
3205   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3206   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3207   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3208   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3209   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3210   B->preallocated = PETSC_TRUE;
3211   PetscFunctionReturn(0);
3212 }
3213 EXTERN_C_END
3214 
3215 #undef __FUNCT__
3216 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3217 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3218 {
3219   Mat            mat;
3220   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3221   PetscErrorCode ierr;
3222 
3223   PetscFunctionBegin;
3224   *newmat       = 0;
3225   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3226   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3227   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3228   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3229   a    = (Mat_MPIAIJ*)mat->data;
3230 
3231   mat->factortype    = matin->factortype;
3232   mat->rmap->bs      = matin->rmap->bs;
3233   mat->assembled    = PETSC_TRUE;
3234   mat->insertmode   = NOT_SET_VALUES;
3235   mat->preallocated = PETSC_TRUE;
3236 
3237   a->size           = oldmat->size;
3238   a->rank           = oldmat->rank;
3239   a->donotstash     = oldmat->donotstash;
3240   a->roworiented    = oldmat->roworiented;
3241   a->rowindices     = 0;
3242   a->rowvalues      = 0;
3243   a->getrowactive   = PETSC_FALSE;
3244 
3245   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3246   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3247 
3248   if (oldmat->colmap) {
3249 #if defined (PETSC_USE_CTABLE)
3250     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3251 #else
3252     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3253     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3254     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3255 #endif
3256   } else a->colmap = 0;
3257   if (oldmat->garray) {
3258     PetscInt len;
3259     len  = oldmat->B->cmap->n;
3260     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3261     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3262     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3263   } else a->garray = 0;
3264 
3265   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3266   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3267   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3268   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3269   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3270   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3271   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3272   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3273   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3274   *newmat = mat;
3275   PetscFunctionReturn(0);
3276 }
3277 
3278 
3279 
3280 #undef __FUNCT__
3281 #define __FUNCT__ "MatLoad_MPIAIJ"
3282 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3283 {
3284   PetscScalar    *vals,*svals;
3285   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3286   PetscErrorCode ierr;
3287   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3288   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3289   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3290   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
3291   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3292   int            fd;
3293 
3294   PetscFunctionBegin;
3295   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3296   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3297   if (!rank) {
3298     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3299     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3300     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3301   }
3302 
3303   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3304 
3305   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3306   M = header[1]; N = header[2];
3307   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3308   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3309   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3310 
3311   /* If global sizes are set, check if they are consistent with that given in the file */
3312   if (sizesset) {
3313     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3314   }
3315   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3316   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3317 
3318   /* determine ownership of all rows */
3319   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3320   else m = newMat->rmap->n; /* Set by user */
3321 
3322   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3323   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3324 
3325   /* First process needs enough room for process with most rows */
3326   if (!rank) {
3327     mmax       = rowners[1];
3328     for (i=2; i<size; i++) {
3329       mmax = PetscMax(mmax,rowners[i]);
3330     }
3331   } else mmax = m;
3332 
3333   rowners[0] = 0;
3334   for (i=2; i<=size; i++) {
3335     rowners[i] += rowners[i-1];
3336   }
3337   rstart = rowners[rank];
3338   rend   = rowners[rank+1];
3339 
3340   /* distribute row lengths to all processors */
3341   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3342   if (!rank) {
3343     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3344     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3345     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3346     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3347     for (j=0; j<m; j++) {
3348       procsnz[0] += ourlens[j];
3349     }
3350     for (i=1; i<size; i++) {
3351       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3352       /* calculate the number of nonzeros on each processor */
3353       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3354         procsnz[i] += rowlengths[j];
3355       }
3356       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3357     }
3358     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3359   } else {
3360     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3361   }
3362 
3363   if (!rank) {
3364     /* determine max buffer needed and allocate it */
3365     maxnz = 0;
3366     for (i=0; i<size; i++) {
3367       maxnz = PetscMax(maxnz,procsnz[i]);
3368     }
3369     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3370 
3371     /* read in my part of the matrix column indices  */
3372     nz   = procsnz[0];
3373     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3374     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3375 
3376     /* read in every one elses and ship off */
3377     for (i=1; i<size; i++) {
3378       nz     = procsnz[i];
3379       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3380       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3381     }
3382     ierr = PetscFree(cols);CHKERRQ(ierr);
3383   } else {
3384     /* determine buffer space needed for message */
3385     nz = 0;
3386     for (i=0; i<m; i++) {
3387       nz += ourlens[i];
3388     }
3389     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3390 
3391     /* receive message of column indices*/
3392     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3393   }
3394 
3395   /* determine column ownership if matrix is not square */
3396   if (N != M) {
3397     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3398     else n = newMat->cmap->n;
3399     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3400     cstart = cend - n;
3401   } else {
3402     cstart = rstart;
3403     cend   = rend;
3404     n      = cend - cstart;
3405   }
3406 
3407   /* loop over local rows, determining number of off diagonal entries */
3408   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3409   jj = 0;
3410   for (i=0; i<m; i++) {
3411     for (j=0; j<ourlens[i]; j++) {
3412       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3413       jj++;
3414     }
3415   }
3416 
3417   for (i=0; i<m; i++) {
3418     ourlens[i] -= offlens[i];
3419   }
3420   if (!sizesset) {
3421     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3422   }
3423   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3424 
3425   for (i=0; i<m; i++) {
3426     ourlens[i] += offlens[i];
3427   }
3428 
3429   if (!rank) {
3430     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3431 
3432     /* read in my part of the matrix numerical values  */
3433     nz   = procsnz[0];
3434     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3435 
3436     /* insert into matrix */
3437     jj      = rstart;
3438     smycols = mycols;
3439     svals   = vals;
3440     for (i=0; i<m; i++) {
3441       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3442       smycols += ourlens[i];
3443       svals   += ourlens[i];
3444       jj++;
3445     }
3446 
3447     /* read in other processors and ship out */
3448     for (i=1; i<size; i++) {
3449       nz     = procsnz[i];
3450       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3451       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3452     }
3453     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3454   } else {
3455     /* receive numeric values */
3456     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3457 
3458     /* receive message of values*/
3459     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3460 
3461     /* insert into matrix */
3462     jj      = rstart;
3463     smycols = mycols;
3464     svals   = vals;
3465     for (i=0; i<m; i++) {
3466       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3467       smycols += ourlens[i];
3468       svals   += ourlens[i];
3469       jj++;
3470     }
3471   }
3472   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3473   ierr = PetscFree(vals);CHKERRQ(ierr);
3474   ierr = PetscFree(mycols);CHKERRQ(ierr);
3475   ierr = PetscFree(rowners);CHKERRQ(ierr);
3476 
3477   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3478   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3479   PetscFunctionReturn(0);
3480 }
3481 
3482 #undef __FUNCT__
3483 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3484 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3485 {
3486   PetscErrorCode ierr;
3487   IS             iscol_local;
3488   PetscInt       csize;
3489 
3490   PetscFunctionBegin;
3491   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3492   if (call == MAT_REUSE_MATRIX) {
3493     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3494     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3495   } else {
3496     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3497   }
3498   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3499   if (call == MAT_INITIAL_MATRIX) {
3500     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3501     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3502   }
3503   PetscFunctionReturn(0);
3504 }
3505 
3506 #undef __FUNCT__
3507 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3508 /*
3509     Not great since it makes two copies of the submatrix, first an SeqAIJ
3510   in local and then by concatenating the local matrices the end result.
3511   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3512 
3513   Note: This requires a sequential iscol with all indices.
3514 */
3515 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3516 {
3517   PetscErrorCode ierr;
3518   PetscMPIInt    rank,size;
3519   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3520   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3521   Mat            *local,M,Mreuse;
3522   MatScalar      *vwork,*aa;
3523   MPI_Comm       comm = ((PetscObject)mat)->comm;
3524   Mat_SeqAIJ     *aij;
3525 
3526 
3527   PetscFunctionBegin;
3528   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3529   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3530 
3531   if (call ==  MAT_REUSE_MATRIX) {
3532     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3533     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3534     local = &Mreuse;
3535     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3536   } else {
3537     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3538     Mreuse = *local;
3539     ierr   = PetscFree(local);CHKERRQ(ierr);
3540   }
3541 
3542   /*
3543       m - number of local rows
3544       n - number of columns (same on all processors)
3545       rstart - first row in new global matrix generated
3546   */
3547   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3548   if (call == MAT_INITIAL_MATRIX) {
3549     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3550     ii  = aij->i;
3551     jj  = aij->j;
3552 
3553     /*
3554         Determine the number of non-zeros in the diagonal and off-diagonal
3555         portions of the matrix in order to do correct preallocation
3556     */
3557 
3558     /* first get start and end of "diagonal" columns */
3559     if (csize == PETSC_DECIDE) {
3560       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3561       if (mglobal == n) { /* square matrix */
3562 	nlocal = m;
3563       } else {
3564         nlocal = n/size + ((n % size) > rank);
3565       }
3566     } else {
3567       nlocal = csize;
3568     }
3569     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3570     rstart = rend - nlocal;
3571     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3572 
3573     /* next, compute all the lengths */
3574     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3575     olens = dlens + m;
3576     for (i=0; i<m; i++) {
3577       jend = ii[i+1] - ii[i];
3578       olen = 0;
3579       dlen = 0;
3580       for (j=0; j<jend; j++) {
3581         if (*jj < rstart || *jj >= rend) olen++;
3582         else dlen++;
3583         jj++;
3584       }
3585       olens[i] = olen;
3586       dlens[i] = dlen;
3587     }
3588     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3589     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3590     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3591     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3592     ierr = PetscFree(dlens);CHKERRQ(ierr);
3593   } else {
3594     PetscInt ml,nl;
3595 
3596     M = *newmat;
3597     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3598     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3599     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3600     /*
3601          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3602        rather than the slower MatSetValues().
3603     */
3604     M->was_assembled = PETSC_TRUE;
3605     M->assembled     = PETSC_FALSE;
3606   }
3607   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3608   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3609   ii  = aij->i;
3610   jj  = aij->j;
3611   aa  = aij->a;
3612   for (i=0; i<m; i++) {
3613     row   = rstart + i;
3614     nz    = ii[i+1] - ii[i];
3615     cwork = jj;     jj += nz;
3616     vwork = aa;     aa += nz;
3617     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3618   }
3619 
3620   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3621   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3622   *newmat = M;
3623 
3624   /* save submatrix used in processor for next request */
3625   if (call ==  MAT_INITIAL_MATRIX) {
3626     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3627     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3628   }
3629 
3630   PetscFunctionReturn(0);
3631 }
3632 
3633 EXTERN_C_BEGIN
3634 #undef __FUNCT__
3635 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3636 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3637 {
3638   PetscInt       m,cstart, cend,j,nnz,i,d;
3639   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3640   const PetscInt *JJ;
3641   PetscScalar    *values;
3642   PetscErrorCode ierr;
3643 
3644   PetscFunctionBegin;
3645   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3646 
3647   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3648   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3649   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3650   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3651   m      = B->rmap->n;
3652   cstart = B->cmap->rstart;
3653   cend   = B->cmap->rend;
3654   rstart = B->rmap->rstart;
3655 
3656   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3657 
3658 #if defined(PETSC_USE_DEBUGGING)
3659   for (i=0; i<m; i++) {
3660     nnz     = Ii[i+1]- Ii[i];
3661     JJ      = J + Ii[i];
3662     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3663     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3664     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3665   }
3666 #endif
3667 
3668   for (i=0; i<m; i++) {
3669     nnz     = Ii[i+1]- Ii[i];
3670     JJ      = J + Ii[i];
3671     nnz_max = PetscMax(nnz_max,nnz);
3672     d       = 0;
3673     for (j=0; j<nnz; j++) {
3674       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3675     }
3676     d_nnz[i] = d;
3677     o_nnz[i] = nnz - d;
3678   }
3679   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3680   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3681 
3682   if (v) values = (PetscScalar*)v;
3683   else {
3684     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3685     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3686   }
3687 
3688   for (i=0; i<m; i++) {
3689     ii   = i + rstart;
3690     nnz  = Ii[i+1]- Ii[i];
3691     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3692   }
3693   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3694   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3695 
3696   if (!v) {
3697     ierr = PetscFree(values);CHKERRQ(ierr);
3698   }
3699   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3700   PetscFunctionReturn(0);
3701 }
3702 EXTERN_C_END
3703 
3704 #undef __FUNCT__
3705 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3706 /*@
3707    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3708    (the default parallel PETSc format).
3709 
3710    Collective on MPI_Comm
3711 
3712    Input Parameters:
3713 +  B - the matrix
3714 .  i - the indices into j for the start of each local row (starts with zero)
3715 .  j - the column indices for each local row (starts with zero)
3716 -  v - optional values in the matrix
3717 
3718    Level: developer
3719 
3720    Notes:
3721        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3722      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3723      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3724 
3725        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3726 
3727        The format which is used for the sparse matrix input, is equivalent to a
3728     row-major ordering.. i.e for the following matrix, the input data expected is
3729     as shown:
3730 
3731         1 0 0
3732         2 0 3     P0
3733        -------
3734         4 5 6     P1
3735 
3736      Process0 [P0]: rows_owned=[0,1]
3737         i =  {0,1,3}  [size = nrow+1  = 2+1]
3738         j =  {0,0,2}  [size = nz = 6]
3739         v =  {1,2,3}  [size = nz = 6]
3740 
3741      Process1 [P1]: rows_owned=[2]
3742         i =  {0,3}    [size = nrow+1  = 1+1]
3743         j =  {0,1,2}  [size = nz = 6]
3744         v =  {4,5,6}  [size = nz = 6]
3745 
3746 .keywords: matrix, aij, compressed row, sparse, parallel
3747 
3748 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3749           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3750 @*/
3751 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3752 {
3753   PetscErrorCode ierr;
3754 
3755   PetscFunctionBegin;
3756   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3757   PetscFunctionReturn(0);
3758 }
3759 
3760 #undef __FUNCT__
3761 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3762 /*@C
3763    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3764    (the default parallel PETSc format).  For good matrix assembly performance
3765    the user should preallocate the matrix storage by setting the parameters
3766    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3767    performance can be increased by more than a factor of 50.
3768 
3769    Collective on MPI_Comm
3770 
3771    Input Parameters:
3772 +  A - the matrix
3773 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3774            (same value is used for all local rows)
3775 .  d_nnz - array containing the number of nonzeros in the various rows of the
3776            DIAGONAL portion of the local submatrix (possibly different for each row)
3777            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3778            The size of this array is equal to the number of local rows, i.e 'm'.
3779            For matrices that will be factored, you must leave room for (and set)
3780            the diagonal entry even if it is zero.
3781 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3782            submatrix (same value is used for all local rows).
3783 -  o_nnz - array containing the number of nonzeros in the various rows of the
3784            OFF-DIAGONAL portion of the local submatrix (possibly different for
3785            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3786            structure. The size of this array is equal to the number
3787            of local rows, i.e 'm'.
3788 
3789    If the *_nnz parameter is given then the *_nz parameter is ignored
3790 
3791    The AIJ format (also called the Yale sparse matrix format or
3792    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3793    storage.  The stored row and column indices begin with zero.
3794    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3795 
3796    The parallel matrix is partitioned such that the first m0 rows belong to
3797    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3798    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3799 
3800    The DIAGONAL portion of the local submatrix of a processor can be defined
3801    as the submatrix which is obtained by extraction the part corresponding to
3802    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3803    first row that belongs to the processor, r2 is the last row belonging to
3804    the this processor, and c1-c2 is range of indices of the local part of a
3805    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3806    common case of a square matrix, the row and column ranges are the same and
3807    the DIAGONAL part is also square. The remaining portion of the local
3808    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3809 
3810    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3811 
3812    You can call MatGetInfo() to get information on how effective the preallocation was;
3813    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3814    You can also run with the option -info and look for messages with the string
3815    malloc in them to see if additional memory allocation was needed.
3816 
3817    Example usage:
3818 
3819    Consider the following 8x8 matrix with 34 non-zero values, that is
3820    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3821    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3822    as follows:
3823 
3824 .vb
3825             1  2  0  |  0  3  0  |  0  4
3826     Proc0   0  5  6  |  7  0  0  |  8  0
3827             9  0 10  | 11  0  0  | 12  0
3828     -------------------------------------
3829            13  0 14  | 15 16 17  |  0  0
3830     Proc1   0 18  0  | 19 20 21  |  0  0
3831             0  0  0  | 22 23  0  | 24  0
3832     -------------------------------------
3833     Proc2  25 26 27  |  0  0 28  | 29  0
3834            30  0  0  | 31 32 33  |  0 34
3835 .ve
3836 
3837    This can be represented as a collection of submatrices as:
3838 
3839 .vb
3840       A B C
3841       D E F
3842       G H I
3843 .ve
3844 
3845    Where the submatrices A,B,C are owned by proc0, D,E,F are
3846    owned by proc1, G,H,I are owned by proc2.
3847 
3848    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3849    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3850    The 'M','N' parameters are 8,8, and have the same values on all procs.
3851 
3852    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3853    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3854    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3855    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3856    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3857    matrix, ans [DF] as another SeqAIJ matrix.
3858 
3859    When d_nz, o_nz parameters are specified, d_nz storage elements are
3860    allocated for every row of the local diagonal submatrix, and o_nz
3861    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3862    One way to choose d_nz and o_nz is to use the max nonzerors per local
3863    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3864    In this case, the values of d_nz,o_nz are:
3865 .vb
3866      proc0 : dnz = 2, o_nz = 2
3867      proc1 : dnz = 3, o_nz = 2
3868      proc2 : dnz = 1, o_nz = 4
3869 .ve
3870    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3871    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3872    for proc3. i.e we are using 12+15+10=37 storage locations to store
3873    34 values.
3874 
3875    When d_nnz, o_nnz parameters are specified, the storage is specified
3876    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3877    In the above case the values for d_nnz,o_nnz are:
3878 .vb
3879      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3880      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3881      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3882 .ve
3883    Here the space allocated is sum of all the above values i.e 34, and
3884    hence pre-allocation is perfect.
3885 
3886    Level: intermediate
3887 
3888 .keywords: matrix, aij, compressed row, sparse, parallel
3889 
3890 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3891           MPIAIJ, MatGetInfo()
3892 @*/
3893 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3894 {
3895   PetscErrorCode ierr;
3896 
3897   PetscFunctionBegin;
3898   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3899   PetscValidType(B,1);
3900   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3901   PetscFunctionReturn(0);
3902 }
3903 
3904 #undef __FUNCT__
3905 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3906 /*@
3907      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3908          CSR format the local rows.
3909 
3910    Collective on MPI_Comm
3911 
3912    Input Parameters:
3913 +  comm - MPI communicator
3914 .  m - number of local rows (Cannot be PETSC_DECIDE)
3915 .  n - This value should be the same as the local size used in creating the
3916        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3917        calculated if N is given) For square matrices n is almost always m.
3918 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3919 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3920 .   i - row indices
3921 .   j - column indices
3922 -   a - matrix values
3923 
3924    Output Parameter:
3925 .   mat - the matrix
3926 
3927    Level: intermediate
3928 
3929    Notes:
3930        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3931      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3932      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3933 
3934        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3935 
3936        The format which is used for the sparse matrix input, is equivalent to a
3937     row-major ordering.. i.e for the following matrix, the input data expected is
3938     as shown:
3939 
3940         1 0 0
3941         2 0 3     P0
3942        -------
3943         4 5 6     P1
3944 
3945      Process0 [P0]: rows_owned=[0,1]
3946         i =  {0,1,3}  [size = nrow+1  = 2+1]
3947         j =  {0,0,2}  [size = nz = 6]
3948         v =  {1,2,3}  [size = nz = 6]
3949 
3950      Process1 [P1]: rows_owned=[2]
3951         i =  {0,3}    [size = nrow+1  = 1+1]
3952         j =  {0,1,2}  [size = nz = 6]
3953         v =  {4,5,6}  [size = nz = 6]
3954 
3955 .keywords: matrix, aij, compressed row, sparse, parallel
3956 
3957 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3958           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3959 @*/
3960 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3961 {
3962   PetscErrorCode ierr;
3963 
3964   PetscFunctionBegin;
3965   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3966   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3967   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3968   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3969   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3970   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3971   PetscFunctionReturn(0);
3972 }
3973 
3974 #undef __FUNCT__
3975 #define __FUNCT__ "MatCreateAIJ"
3976 /*@C
3977    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
3978    (the default parallel PETSc format).  For good matrix assembly performance
3979    the user should preallocate the matrix storage by setting the parameters
3980    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3981    performance can be increased by more than a factor of 50.
3982 
3983    Collective on MPI_Comm
3984 
3985    Input Parameters:
3986 +  comm - MPI communicator
3987 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3988            This value should be the same as the local size used in creating the
3989            y vector for the matrix-vector product y = Ax.
3990 .  n - This value should be the same as the local size used in creating the
3991        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3992        calculated if N is given) For square matrices n is almost always m.
3993 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3994 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3995 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3996            (same value is used for all local rows)
3997 .  d_nnz - array containing the number of nonzeros in the various rows of the
3998            DIAGONAL portion of the local submatrix (possibly different for each row)
3999            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
4000            The size of this array is equal to the number of local rows, i.e 'm'.
4001 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4002            submatrix (same value is used for all local rows).
4003 -  o_nnz - array containing the number of nonzeros in the various rows of the
4004            OFF-DIAGONAL portion of the local submatrix (possibly different for
4005            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
4006            structure. The size of this array is equal to the number
4007            of local rows, i.e 'm'.
4008 
4009    Output Parameter:
4010 .  A - the matrix
4011 
4012    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4013    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4014    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4015 
4016    Notes:
4017    If the *_nnz parameter is given then the *_nz parameter is ignored
4018 
4019    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4020    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4021    storage requirements for this matrix.
4022 
4023    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4024    processor than it must be used on all processors that share the object for
4025    that argument.
4026 
4027    The user MUST specify either the local or global matrix dimensions
4028    (possibly both).
4029 
4030    The parallel matrix is partitioned across processors such that the
4031    first m0 rows belong to process 0, the next m1 rows belong to
4032    process 1, the next m2 rows belong to process 2 etc.. where
4033    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4034    values corresponding to [m x N] submatrix.
4035 
4036    The columns are logically partitioned with the n0 columns belonging
4037    to 0th partition, the next n1 columns belonging to the next
4038    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4039 
4040    The DIAGONAL portion of the local submatrix on any given processor
4041    is the submatrix corresponding to the rows and columns m,n
4042    corresponding to the given processor. i.e diagonal matrix on
4043    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4044    etc. The remaining portion of the local submatrix [m x (N-n)]
4045    constitute the OFF-DIAGONAL portion. The example below better
4046    illustrates this concept.
4047 
4048    For a square global matrix we define each processor's diagonal portion
4049    to be its local rows and the corresponding columns (a square submatrix);
4050    each processor's off-diagonal portion encompasses the remainder of the
4051    local matrix (a rectangular submatrix).
4052 
4053    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4054 
4055    When calling this routine with a single process communicator, a matrix of
4056    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4057    type of communicator, use the construction mechanism:
4058      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4059 
4060    By default, this format uses inodes (identical nodes) when possible.
4061    We search for consecutive rows with the same nonzero structure, thereby
4062    reusing matrix information to achieve increased efficiency.
4063 
4064    Options Database Keys:
4065 +  -mat_no_inode  - Do not use inodes
4066 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4067 -  -mat_aij_oneindex - Internally use indexing starting at 1
4068         rather than 0.  Note that when calling MatSetValues(),
4069         the user still MUST index entries starting at 0!
4070 
4071 
4072    Example usage:
4073 
4074    Consider the following 8x8 matrix with 34 non-zero values, that is
4075    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4076    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4077    as follows:
4078 
4079 .vb
4080             1  2  0  |  0  3  0  |  0  4
4081     Proc0   0  5  6  |  7  0  0  |  8  0
4082             9  0 10  | 11  0  0  | 12  0
4083     -------------------------------------
4084            13  0 14  | 15 16 17  |  0  0
4085     Proc1   0 18  0  | 19 20 21  |  0  0
4086             0  0  0  | 22 23  0  | 24  0
4087     -------------------------------------
4088     Proc2  25 26 27  |  0  0 28  | 29  0
4089            30  0  0  | 31 32 33  |  0 34
4090 .ve
4091 
4092    This can be represented as a collection of submatrices as:
4093 
4094 .vb
4095       A B C
4096       D E F
4097       G H I
4098 .ve
4099 
4100    Where the submatrices A,B,C are owned by proc0, D,E,F are
4101    owned by proc1, G,H,I are owned by proc2.
4102 
4103    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4104    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4105    The 'M','N' parameters are 8,8, and have the same values on all procs.
4106 
4107    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4108    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4109    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4110    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4111    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4112    matrix, ans [DF] as another SeqAIJ matrix.
4113 
4114    When d_nz, o_nz parameters are specified, d_nz storage elements are
4115    allocated for every row of the local diagonal submatrix, and o_nz
4116    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4117    One way to choose d_nz and o_nz is to use the max nonzerors per local
4118    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4119    In this case, the values of d_nz,o_nz are:
4120 .vb
4121      proc0 : dnz = 2, o_nz = 2
4122      proc1 : dnz = 3, o_nz = 2
4123      proc2 : dnz = 1, o_nz = 4
4124 .ve
4125    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4126    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4127    for proc3. i.e we are using 12+15+10=37 storage locations to store
4128    34 values.
4129 
4130    When d_nnz, o_nnz parameters are specified, the storage is specified
4131    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4132    In the above case the values for d_nnz,o_nnz are:
4133 .vb
4134      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4135      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4136      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4137 .ve
4138    Here the space allocated is sum of all the above values i.e 34, and
4139    hence pre-allocation is perfect.
4140 
4141    Level: intermediate
4142 
4143 .keywords: matrix, aij, compressed row, sparse, parallel
4144 
4145 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4146           MPIAIJ, MatCreateMPIAIJWithArrays()
4147 @*/
4148 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4149 {
4150   PetscErrorCode ierr;
4151   PetscMPIInt    size;
4152 
4153   PetscFunctionBegin;
4154   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4155   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4156   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4157   if (size > 1) {
4158     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4159     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4160   } else {
4161     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4162     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4163   }
4164   PetscFunctionReturn(0);
4165 }
4166 
4167 #undef __FUNCT__
4168 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4169 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
4170 {
4171   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4172 
4173   PetscFunctionBegin;
4174   *Ad     = a->A;
4175   *Ao     = a->B;
4176   *colmap = a->garray;
4177   PetscFunctionReturn(0);
4178 }
4179 
4180 #undef __FUNCT__
4181 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4182 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4183 {
4184   PetscErrorCode ierr;
4185   PetscInt       i;
4186   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4187 
4188   PetscFunctionBegin;
4189   if (coloring->ctype == IS_COLORING_GLOBAL) {
4190     ISColoringValue *allcolors,*colors;
4191     ISColoring      ocoloring;
4192 
4193     /* set coloring for diagonal portion */
4194     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4195 
4196     /* set coloring for off-diagonal portion */
4197     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
4198     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4199     for (i=0; i<a->B->cmap->n; i++) {
4200       colors[i] = allcolors[a->garray[i]];
4201     }
4202     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4203     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4204     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4205     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4206   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4207     ISColoringValue *colors;
4208     PetscInt        *larray;
4209     ISColoring      ocoloring;
4210 
4211     /* set coloring for diagonal portion */
4212     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4213     for (i=0; i<a->A->cmap->n; i++) {
4214       larray[i] = i + A->cmap->rstart;
4215     }
4216     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
4217     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4218     for (i=0; i<a->A->cmap->n; i++) {
4219       colors[i] = coloring->colors[larray[i]];
4220     }
4221     ierr = PetscFree(larray);CHKERRQ(ierr);
4222     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4223     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4224     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4225 
4226     /* set coloring for off-diagonal portion */
4227     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4228     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
4229     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4230     for (i=0; i<a->B->cmap->n; i++) {
4231       colors[i] = coloring->colors[larray[i]];
4232     }
4233     ierr = PetscFree(larray);CHKERRQ(ierr);
4234     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4235     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4236     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4237   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4238 
4239   PetscFunctionReturn(0);
4240 }
4241 
4242 #if defined(PETSC_HAVE_ADIC)
4243 #undef __FUNCT__
4244 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
4245 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
4246 {
4247   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4248   PetscErrorCode ierr;
4249 
4250   PetscFunctionBegin;
4251   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
4252   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
4253   PetscFunctionReturn(0);
4254 }
4255 #endif
4256 
4257 #undef __FUNCT__
4258 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4259 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4260 {
4261   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4262   PetscErrorCode ierr;
4263 
4264   PetscFunctionBegin;
4265   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4266   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4267   PetscFunctionReturn(0);
4268 }
4269 
4270 #undef __FUNCT__
4271 #define __FUNCT__ "MatMergeSymbolic"
4272 PetscErrorCode  MatMergeSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4273 {
4274   PetscErrorCode ierr;
4275   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum;
4276   PetscInt       *indx;
4277 
4278   PetscFunctionBegin;
4279   /* This routine will ONLY return MPIAIJ type matrix */
4280   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4281   if (n == PETSC_DECIDE){
4282     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4283   }
4284   /* Check sum(n) = N */
4285   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPIU_SUM,comm);CHKERRQ(ierr);
4286   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4287 
4288   ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4289   rstart -= m;
4290 
4291   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4292   for (i=0;i<m;i++) {
4293     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4294     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4295     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
4296   }
4297 
4298   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4299   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4300   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4301   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4302   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4303   PetscFunctionReturn(0);
4304 }
4305 
4306 #undef __FUNCT__
4307 #define __FUNCT__ "MatMergeNumeric"
4308 PetscErrorCode  MatMergeNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4309 {
4310   PetscErrorCode ierr;
4311   PetscInt       m,N,i,rstart,nnz,Ii;
4312   PetscInt       *indx;
4313   PetscScalar    *values;
4314 
4315   PetscFunctionBegin;
4316   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4317   ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4318   for (i=0;i<m;i++) {
4319     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4320     Ii    = i + rstart;
4321     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4322     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4323   }
4324   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4325   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4326   PetscFunctionReturn(0);
4327 }
4328 
4329 #undef __FUNCT__
4330 #define __FUNCT__ "MatMerge"
4331 /*@
4332       MatMerge - Creates a single large PETSc matrix by concatenating sequential
4333                  matrices from each processor
4334 
4335     Collective on MPI_Comm
4336 
4337    Input Parameters:
4338 +    comm - the communicators the parallel matrix will live on
4339 .    inmat - the input sequential matrices
4340 .    n - number of local columns (or PETSC_DECIDE)
4341 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4342 
4343    Output Parameter:
4344 .    outmat - the parallel matrix generated
4345 
4346     Level: advanced
4347 
4348    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4349 
4350 @*/
4351 PetscErrorCode  MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4352 {
4353   PetscErrorCode ierr;
4354 
4355   PetscFunctionBegin;
4356   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4357   if (scall == MAT_INITIAL_MATRIX){
4358     ierr = MatMergeSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4359   }
4360   ierr = MatMergeNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4361   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4362   PetscFunctionReturn(0);
4363 }
4364 
4365 #undef __FUNCT__
4366 #define __FUNCT__ "MatFileSplit"
4367 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4368 {
4369   PetscErrorCode    ierr;
4370   PetscMPIInt       rank;
4371   PetscInt          m,N,i,rstart,nnz;
4372   size_t            len;
4373   const PetscInt    *indx;
4374   PetscViewer       out;
4375   char              *name;
4376   Mat               B;
4377   const PetscScalar *values;
4378 
4379   PetscFunctionBegin;
4380   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4381   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4382   /* Should this be the type of the diagonal block of A? */
4383   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4384   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4385   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4386   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4387   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4388   for (i=0;i<m;i++) {
4389     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4390     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4391     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4392   }
4393   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4394   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4395 
4396   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4397   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4398   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4399   sprintf(name,"%s.%d",outfile,rank);
4400   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4401   ierr = PetscFree(name);
4402   ierr = MatView(B,out);CHKERRQ(ierr);
4403   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4404   ierr = MatDestroy(&B);CHKERRQ(ierr);
4405   PetscFunctionReturn(0);
4406 }
4407 
4408 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4409 #undef __FUNCT__
4410 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4411 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4412 {
4413   PetscErrorCode       ierr;
4414   Mat_Merge_SeqsToMPI  *merge;
4415   PetscContainer       container;
4416 
4417   PetscFunctionBegin;
4418   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4419   if (container) {
4420     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4421     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4422     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4423     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4424     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4425     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4426     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4427     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4428     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4429     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4430     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4431     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4432     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4433     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4434     ierr = PetscFree(merge);CHKERRQ(ierr);
4435     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4436   }
4437   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4438   PetscFunctionReturn(0);
4439 }
4440 
4441 #include <../src/mat/utils/freespace.h>
4442 #include <petscbt.h>
4443 
4444 #undef __FUNCT__
4445 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4446 PetscErrorCode  MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4447 {
4448   PetscErrorCode       ierr;
4449   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4450   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4451   PetscMPIInt          size,rank,taga,*len_s;
4452   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4453   PetscInt             proc,m;
4454   PetscInt             **buf_ri,**buf_rj;
4455   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4456   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4457   MPI_Request          *s_waits,*r_waits;
4458   MPI_Status           *status;
4459   MatScalar            *aa=a->a;
4460   MatScalar            **abuf_r,*ba_i;
4461   Mat_Merge_SeqsToMPI  *merge;
4462   PetscContainer       container;
4463 
4464   PetscFunctionBegin;
4465   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4466 
4467   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4468   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4469 
4470   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4471   ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4472 
4473   bi     = merge->bi;
4474   bj     = merge->bj;
4475   buf_ri = merge->buf_ri;
4476   buf_rj = merge->buf_rj;
4477 
4478   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4479   owners = merge->rowmap->range;
4480   len_s  = merge->len_s;
4481 
4482   /* send and recv matrix values */
4483   /*-----------------------------*/
4484   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4485   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4486 
4487   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4488   for (proc=0,k=0; proc<size; proc++){
4489     if (!len_s[proc]) continue;
4490     i = owners[proc];
4491     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4492     k++;
4493   }
4494 
4495   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4496   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4497   ierr = PetscFree(status);CHKERRQ(ierr);
4498 
4499   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4500   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4501 
4502   /* insert mat values of mpimat */
4503   /*----------------------------*/
4504   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4505   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4506 
4507   for (k=0; k<merge->nrecv; k++){
4508     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4509     nrows = *(buf_ri_k[k]);
4510     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4511     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4512   }
4513 
4514   /* set values of ba */
4515   m = merge->rowmap->n;
4516   for (i=0; i<m; i++) {
4517     arow = owners[rank] + i;
4518     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4519     bnzi = bi[i+1] - bi[i];
4520     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4521 
4522     /* add local non-zero vals of this proc's seqmat into ba */
4523     anzi = ai[arow+1] - ai[arow];
4524     aj   = a->j + ai[arow];
4525     aa   = a->a + ai[arow];
4526     nextaj = 0;
4527     for (j=0; nextaj<anzi; j++){
4528       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4529         ba_i[j] += aa[nextaj++];
4530       }
4531     }
4532 
4533     /* add received vals into ba */
4534     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4535       /* i-th row */
4536       if (i == *nextrow[k]) {
4537         anzi = *(nextai[k]+1) - *nextai[k];
4538         aj   = buf_rj[k] + *(nextai[k]);
4539         aa   = abuf_r[k] + *(nextai[k]);
4540         nextaj = 0;
4541         for (j=0; nextaj<anzi; j++){
4542           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4543             ba_i[j] += aa[nextaj++];
4544           }
4545         }
4546         nextrow[k]++; nextai[k]++;
4547       }
4548     }
4549     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4550   }
4551   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4552   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4553 
4554   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4555   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4556   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4557   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4558   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4559   PetscFunctionReturn(0);
4560 }
4561 
4562 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4563 
4564 #undef __FUNCT__
4565 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4566 PetscErrorCode  MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4567 {
4568   PetscErrorCode       ierr;
4569   Mat                  B_mpi;
4570   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4571   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4572   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4573   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4574   PetscInt             len,proc,*dnz,*onz;
4575   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4576   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4577   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4578   MPI_Status           *status;
4579   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4580   PetscBT              lnkbt;
4581   Mat_Merge_SeqsToMPI  *merge;
4582   PetscContainer       container;
4583 
4584   PetscFunctionBegin;
4585   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4586 
4587   /* make sure it is a PETSc comm */
4588   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4589   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4590   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4591 
4592   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4593   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4594 
4595   /* determine row ownership */
4596   /*---------------------------------------------------------*/
4597   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4598   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4599   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4600   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4601   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4602   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4603   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4604 
4605   m      = merge->rowmap->n;
4606   M      = merge->rowmap->N;
4607   owners = merge->rowmap->range;
4608 
4609   /* determine the number of messages to send, their lengths */
4610   /*---------------------------------------------------------*/
4611   len_s  = merge->len_s;
4612 
4613   len = 0;  /* length of buf_si[] */
4614   merge->nsend = 0;
4615   for (proc=0; proc<size; proc++){
4616     len_si[proc] = 0;
4617     if (proc == rank){
4618       len_s[proc] = 0;
4619     } else {
4620       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4621       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4622     }
4623     if (len_s[proc]) {
4624       merge->nsend++;
4625       nrows = 0;
4626       for (i=owners[proc]; i<owners[proc+1]; i++){
4627         if (ai[i+1] > ai[i]) nrows++;
4628       }
4629       len_si[proc] = 2*(nrows+1);
4630       len += len_si[proc];
4631     }
4632   }
4633 
4634   /* determine the number and length of messages to receive for ij-structure */
4635   /*-------------------------------------------------------------------------*/
4636   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4637   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4638 
4639   /* post the Irecv of j-structure */
4640   /*-------------------------------*/
4641   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4642   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4643 
4644   /* post the Isend of j-structure */
4645   /*--------------------------------*/
4646   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4647 
4648   for (proc=0, k=0; proc<size; proc++){
4649     if (!len_s[proc]) continue;
4650     i = owners[proc];
4651     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4652     k++;
4653   }
4654 
4655   /* receives and sends of j-structure are complete */
4656   /*------------------------------------------------*/
4657   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4658   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4659 
4660   /* send and recv i-structure */
4661   /*---------------------------*/
4662   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4663   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4664 
4665   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4666   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4667   for (proc=0,k=0; proc<size; proc++){
4668     if (!len_s[proc]) continue;
4669     /* form outgoing message for i-structure:
4670          buf_si[0]:                 nrows to be sent
4671                [1:nrows]:           row index (global)
4672                [nrows+1:2*nrows+1]: i-structure index
4673     */
4674     /*-------------------------------------------*/
4675     nrows = len_si[proc]/2 - 1;
4676     buf_si_i    = buf_si + nrows+1;
4677     buf_si[0]   = nrows;
4678     buf_si_i[0] = 0;
4679     nrows = 0;
4680     for (i=owners[proc]; i<owners[proc+1]; i++){
4681       anzi = ai[i+1] - ai[i];
4682       if (anzi) {
4683         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4684         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4685         nrows++;
4686       }
4687     }
4688     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4689     k++;
4690     buf_si += len_si[proc];
4691   }
4692 
4693   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4694   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4695 
4696   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4697   for (i=0; i<merge->nrecv; i++){
4698     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4699   }
4700 
4701   ierr = PetscFree(len_si);CHKERRQ(ierr);
4702   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4703   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4704   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4705   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4706   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4707   ierr = PetscFree(status);CHKERRQ(ierr);
4708 
4709   /* compute a local seq matrix in each processor */
4710   /*----------------------------------------------*/
4711   /* allocate bi array and free space for accumulating nonzero column info */
4712   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4713   bi[0] = 0;
4714 
4715   /* create and initialize a linked list */
4716   nlnk = N+1;
4717   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4718 
4719   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4720   len = 0;
4721   len  = ai[owners[rank+1]] - ai[owners[rank]];
4722   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4723   current_space = free_space;
4724 
4725   /* determine symbolic info for each local row */
4726   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4727 
4728   for (k=0; k<merge->nrecv; k++){
4729     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4730     nrows = *buf_ri_k[k];
4731     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4732     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4733   }
4734 
4735   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4736   len = 0;
4737   for (i=0;i<m;i++) {
4738     bnzi   = 0;
4739     /* add local non-zero cols of this proc's seqmat into lnk */
4740     arow   = owners[rank] + i;
4741     anzi   = ai[arow+1] - ai[arow];
4742     aj     = a->j + ai[arow];
4743     ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4744     bnzi += nlnk;
4745     /* add received col data into lnk */
4746     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4747       if (i == *nextrow[k]) { /* i-th row */
4748         anzi = *(nextai[k]+1) - *nextai[k];
4749         aj   = buf_rj[k] + *nextai[k];
4750         ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4751         bnzi += nlnk;
4752         nextrow[k]++; nextai[k]++;
4753       }
4754     }
4755     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4756 
4757     /* if free space is not available, make more free space */
4758     if (current_space->local_remaining<bnzi) {
4759       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4760       nspacedouble++;
4761     }
4762     /* copy data into free space, then initialize lnk */
4763     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4764     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4765 
4766     current_space->array           += bnzi;
4767     current_space->local_used      += bnzi;
4768     current_space->local_remaining -= bnzi;
4769 
4770     bi[i+1] = bi[i] + bnzi;
4771   }
4772 
4773   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4774 
4775   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4776   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4777   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4778 
4779   /* create symbolic parallel matrix B_mpi */
4780   /*---------------------------------------*/
4781   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4782   if (n==PETSC_DECIDE) {
4783     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4784   } else {
4785     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4786   }
4787   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4788   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4789   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4790   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4791 
4792   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4793   B_mpi->assembled     = PETSC_FALSE;
4794   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4795   merge->bi            = bi;
4796   merge->bj            = bj;
4797   merge->buf_ri        = buf_ri;
4798   merge->buf_rj        = buf_rj;
4799   merge->coi           = PETSC_NULL;
4800   merge->coj           = PETSC_NULL;
4801   merge->owners_co     = PETSC_NULL;
4802 
4803   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4804 
4805   /* attach the supporting struct to B_mpi for reuse */
4806   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4807   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4808   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4809   ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
4810   *mpimat = B_mpi;
4811 
4812   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4813   PetscFunctionReturn(0);
4814 }
4815 
4816 #undef __FUNCT__
4817 #define __FUNCT__ "MatMerge_SeqsToMPI"
4818 /*@C
4819       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4820                  matrices from each processor
4821 
4822     Collective on MPI_Comm
4823 
4824    Input Parameters:
4825 +    comm - the communicators the parallel matrix will live on
4826 .    seqmat - the input sequential matrices
4827 .    m - number of local rows (or PETSC_DECIDE)
4828 .    n - number of local columns (or PETSC_DECIDE)
4829 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4830 
4831    Output Parameter:
4832 .    mpimat - the parallel matrix generated
4833 
4834     Level: advanced
4835 
4836    Notes:
4837      The dimensions of the sequential matrix in each processor MUST be the same.
4838      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4839      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4840 @*/
4841 PetscErrorCode  MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4842 {
4843   PetscErrorCode   ierr;
4844   PetscMPIInt     size;
4845 
4846   PetscFunctionBegin;
4847   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4848   if (size == 1){
4849      ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4850      if (scall == MAT_INITIAL_MATRIX){
4851        ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4852      } else {
4853        ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4854      }
4855      ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4856      PetscFunctionReturn(0);
4857   }
4858   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4859   if (scall == MAT_INITIAL_MATRIX){
4860     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4861   }
4862   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4863   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4864   PetscFunctionReturn(0);
4865 }
4866 
4867 #undef __FUNCT__
4868 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4869 /*@
4870      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4871           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4872           with MatGetSize()
4873 
4874     Not Collective
4875 
4876    Input Parameters:
4877 +    A - the matrix
4878 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4879 
4880    Output Parameter:
4881 .    A_loc - the local sequential matrix generated
4882 
4883     Level: developer
4884 
4885 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4886 
4887 @*/
4888 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4889 {
4890   PetscErrorCode  ierr;
4891   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4892   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4893   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4894   MatScalar       *aa=a->a,*ba=b->a,*cam;
4895   PetscScalar     *ca;
4896   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4897   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4898   PetscBool       match;
4899 
4900   PetscFunctionBegin;
4901   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4902   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4903   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4904   if (scall == MAT_INITIAL_MATRIX){
4905     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4906     ci[0] = 0;
4907     for (i=0; i<am; i++){
4908       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4909     }
4910     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4911     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4912     k = 0;
4913     for (i=0; i<am; i++) {
4914       ncols_o = bi[i+1] - bi[i];
4915       ncols_d = ai[i+1] - ai[i];
4916       /* off-diagonal portion of A */
4917       for (jo=0; jo<ncols_o; jo++) {
4918         col = cmap[*bj];
4919         if (col >= cstart) break;
4920         cj[k]   = col; bj++;
4921         ca[k++] = *ba++;
4922       }
4923       /* diagonal portion of A */
4924       for (j=0; j<ncols_d; j++) {
4925         cj[k]   = cstart + *aj++;
4926         ca[k++] = *aa++;
4927       }
4928       /* off-diagonal portion of A */
4929       for (j=jo; j<ncols_o; j++) {
4930         cj[k]   = cmap[*bj++];
4931         ca[k++] = *ba++;
4932       }
4933     }
4934     /* put together the new matrix */
4935     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4936     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4937     /* Since these are PETSc arrays, change flags to free them as necessary. */
4938     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4939     mat->free_a  = PETSC_TRUE;
4940     mat->free_ij = PETSC_TRUE;
4941     mat->nonew   = 0;
4942   } else if (scall == MAT_REUSE_MATRIX){
4943     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4944     ci = mat->i; cj = mat->j; cam = mat->a;
4945     for (i=0; i<am; i++) {
4946       /* off-diagonal portion of A */
4947       ncols_o = bi[i+1] - bi[i];
4948       for (jo=0; jo<ncols_o; jo++) {
4949         col = cmap[*bj];
4950         if (col >= cstart) break;
4951         *cam++ = *ba++; bj++;
4952       }
4953       /* diagonal portion of A */
4954       ncols_d = ai[i+1] - ai[i];
4955       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4956       /* off-diagonal portion of A */
4957       for (j=jo; j<ncols_o; j++) {
4958         *cam++ = *ba++; bj++;
4959       }
4960     }
4961   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4962   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4963   PetscFunctionReturn(0);
4964 }
4965 
4966 #undef __FUNCT__
4967 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
4968 /*@C
4969      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
4970 
4971     Not Collective
4972 
4973    Input Parameters:
4974 +    A - the matrix
4975 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4976 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4977 
4978    Output Parameter:
4979 .    A_loc - the local sequential matrix generated
4980 
4981     Level: developer
4982 
4983 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4984 
4985 @*/
4986 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4987 {
4988   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4989   PetscErrorCode    ierr;
4990   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4991   IS                isrowa,iscola;
4992   Mat               *aloc;
4993   PetscBool       match;
4994 
4995   PetscFunctionBegin;
4996   ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4997   if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4998   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4999   if (!row){
5000     start = A->rmap->rstart; end = A->rmap->rend;
5001     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5002   } else {
5003     isrowa = *row;
5004   }
5005   if (!col){
5006     start = A->cmap->rstart;
5007     cmap  = a->garray;
5008     nzA   = a->A->cmap->n;
5009     nzB   = a->B->cmap->n;
5010     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5011     ncols = 0;
5012     for (i=0; i<nzB; i++) {
5013       if (cmap[i] < start) idx[ncols++] = cmap[i];
5014       else break;
5015     }
5016     imark = i;
5017     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5018     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5019     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5020   } else {
5021     iscola = *col;
5022   }
5023   if (scall != MAT_INITIAL_MATRIX){
5024     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5025     aloc[0] = *A_loc;
5026   }
5027   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5028   *A_loc = aloc[0];
5029   ierr = PetscFree(aloc);CHKERRQ(ierr);
5030   if (!row){
5031     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5032   }
5033   if (!col){
5034     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5035   }
5036   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5037   PetscFunctionReturn(0);
5038 }
5039 
5040 #undef __FUNCT__
5041 #define __FUNCT__ "MatGetBrowsOfAcols"
5042 /*@C
5043     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5044 
5045     Collective on Mat
5046 
5047    Input Parameters:
5048 +    A,B - the matrices in mpiaij format
5049 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5050 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
5051 
5052    Output Parameter:
5053 +    rowb, colb - index sets of rows and columns of B to extract
5054 -    B_seq - the sequential matrix generated
5055 
5056     Level: developer
5057 
5058 @*/
5059 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5060 {
5061   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
5062   PetscErrorCode    ierr;
5063   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5064   IS                isrowb,iscolb;
5065   Mat               *bseq=PETSC_NULL;
5066 
5067   PetscFunctionBegin;
5068   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5069     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5070   }
5071   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5072 
5073   if (scall == MAT_INITIAL_MATRIX){
5074     start = A->cmap->rstart;
5075     cmap  = a->garray;
5076     nzA   = a->A->cmap->n;
5077     nzB   = a->B->cmap->n;
5078     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5079     ncols = 0;
5080     for (i=0; i<nzB; i++) {  /* row < local row index */
5081       if (cmap[i] < start) idx[ncols++] = cmap[i];
5082       else break;
5083     }
5084     imark = i;
5085     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5086     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5087     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5088     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5089   } else {
5090     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5091     isrowb = *rowb; iscolb = *colb;
5092     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5093     bseq[0] = *B_seq;
5094   }
5095   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5096   *B_seq = bseq[0];
5097   ierr = PetscFree(bseq);CHKERRQ(ierr);
5098   if (!rowb){
5099     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5100   } else {
5101     *rowb = isrowb;
5102   }
5103   if (!colb){
5104     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5105   } else {
5106     *colb = iscolb;
5107   }
5108   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5109   PetscFunctionReturn(0);
5110 }
5111 
5112 #undef __FUNCT__
5113 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5114 /*
5115     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5116     of the OFF-DIAGONAL portion of local A
5117 
5118     Collective on Mat
5119 
5120    Input Parameters:
5121 +    A,B - the matrices in mpiaij format
5122 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5123 
5124    Output Parameter:
5125 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5126 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
5127 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
5128 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5129 
5130     Level: developer
5131 
5132 */
5133 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5134 {
5135   VecScatter_MPI_General *gen_to,*gen_from;
5136   PetscErrorCode         ierr;
5137   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5138   Mat_SeqAIJ             *b_oth;
5139   VecScatter             ctx=a->Mvctx;
5140   MPI_Comm               comm=((PetscObject)ctx)->comm;
5141   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5142   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5143   PetscScalar            *rvalues,*svalues;
5144   MatScalar              *b_otha,*bufa,*bufA;
5145   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5146   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
5147   MPI_Status             *sstatus,rstatus;
5148   PetscMPIInt            jj;
5149   PetscInt               *cols,sbs,rbs;
5150   PetscScalar            *vals;
5151 
5152   PetscFunctionBegin;
5153   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
5154     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5155   }
5156   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5157   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5158 
5159   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5160   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5161   rvalues  = gen_from->values; /* holds the length of receiving row */
5162   svalues  = gen_to->values;   /* holds the length of sending row */
5163   nrecvs   = gen_from->n;
5164   nsends   = gen_to->n;
5165 
5166   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5167   srow     = gen_to->indices;   /* local row index to be sent */
5168   sstarts  = gen_to->starts;
5169   sprocs   = gen_to->procs;
5170   sstatus  = gen_to->sstatus;
5171   sbs      = gen_to->bs;
5172   rstarts  = gen_from->starts;
5173   rprocs   = gen_from->procs;
5174   rbs      = gen_from->bs;
5175 
5176   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5177   if (scall == MAT_INITIAL_MATRIX){
5178     /* i-array */
5179     /*---------*/
5180     /*  post receives */
5181     for (i=0; i<nrecvs; i++){
5182       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5183       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5184       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5185     }
5186 
5187     /* pack the outgoing message */
5188     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5189     sstartsj[0] = 0;  rstartsj[0] = 0;
5190     len = 0; /* total length of j or a array to be sent */
5191     k = 0;
5192     for (i=0; i<nsends; i++){
5193       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5194       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5195       for (j=0; j<nrows; j++) {
5196         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5197         for (l=0; l<sbs; l++){
5198           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
5199           rowlen[j*sbs+l] = ncols;
5200           len += ncols;
5201           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
5202         }
5203         k++;
5204       }
5205       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5206       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5207     }
5208     /* recvs and sends of i-array are completed */
5209     i = nrecvs;
5210     while (i--) {
5211       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5212     }
5213     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5214 
5215     /* allocate buffers for sending j and a arrays */
5216     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5217     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5218 
5219     /* create i-array of B_oth */
5220     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5221     b_othi[0] = 0;
5222     len = 0; /* total length of j or a array to be received */
5223     k = 0;
5224     for (i=0; i<nrecvs; i++){
5225       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5226       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5227       for (j=0; j<nrows; j++) {
5228         b_othi[k+1] = b_othi[k] + rowlen[j];
5229         len += rowlen[j]; k++;
5230       }
5231       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5232     }
5233 
5234     /* allocate space for j and a arrrays of B_oth */
5235     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5236     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5237 
5238     /* j-array */
5239     /*---------*/
5240     /*  post receives of j-array */
5241     for (i=0; i<nrecvs; i++){
5242       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5243       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5244     }
5245 
5246     /* pack the outgoing message j-array */
5247     k = 0;
5248     for (i=0; i<nsends; i++){
5249       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5250       bufJ = bufj+sstartsj[i];
5251       for (j=0; j<nrows; j++) {
5252         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5253         for (ll=0; ll<sbs; ll++){
5254           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5255           for (l=0; l<ncols; l++){
5256             *bufJ++ = cols[l];
5257           }
5258           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
5259         }
5260       }
5261       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5262     }
5263 
5264     /* recvs and sends of j-array are completed */
5265     i = nrecvs;
5266     while (i--) {
5267       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5268     }
5269     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5270   } else if (scall == MAT_REUSE_MATRIX){
5271     sstartsj = *startsj_s;
5272     rstartsj = *startsj_r;
5273     bufa     = *bufa_ptr;
5274     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5275     b_otha   = b_oth->a;
5276   } else {
5277     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5278   }
5279 
5280   /* a-array */
5281   /*---------*/
5282   /*  post receives of a-array */
5283   for (i=0; i<nrecvs; i++){
5284     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5285     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5286   }
5287 
5288   /* pack the outgoing message a-array */
5289   k = 0;
5290   for (i=0; i<nsends; i++){
5291     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5292     bufA = bufa+sstartsj[i];
5293     for (j=0; j<nrows; j++) {
5294       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
5295       for (ll=0; ll<sbs; ll++){
5296         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5297         for (l=0; l<ncols; l++){
5298           *bufA++ = vals[l];
5299         }
5300         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
5301       }
5302     }
5303     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5304   }
5305   /* recvs and sends of a-array are completed */
5306   i = nrecvs;
5307   while (i--) {
5308     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5309   }
5310   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5311   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5312 
5313   if (scall == MAT_INITIAL_MATRIX){
5314     /* put together the new matrix */
5315     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5316 
5317     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5318     /* Since these are PETSc arrays, change flags to free them as necessary. */
5319     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5320     b_oth->free_a  = PETSC_TRUE;
5321     b_oth->free_ij = PETSC_TRUE;
5322     b_oth->nonew   = 0;
5323 
5324     ierr = PetscFree(bufj);CHKERRQ(ierr);
5325     if (!startsj_s || !bufa_ptr){
5326       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5327       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5328     } else {
5329       *startsj_s = sstartsj;
5330       *startsj_r = rstartsj;
5331       *bufa_ptr  = bufa;
5332     }
5333   }
5334   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5335   PetscFunctionReturn(0);
5336 }
5337 
5338 #undef __FUNCT__
5339 #define __FUNCT__ "MatGetCommunicationStructs"
5340 /*@C
5341   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5342 
5343   Not Collective
5344 
5345   Input Parameters:
5346 . A - The matrix in mpiaij format
5347 
5348   Output Parameter:
5349 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5350 . colmap - A map from global column index to local index into lvec
5351 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5352 
5353   Level: developer
5354 
5355 @*/
5356 #if defined (PETSC_USE_CTABLE)
5357 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5358 #else
5359 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5360 #endif
5361 {
5362   Mat_MPIAIJ *a;
5363 
5364   PetscFunctionBegin;
5365   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5366   PetscValidPointer(lvec, 2);
5367   PetscValidPointer(colmap, 3);
5368   PetscValidPointer(multScatter, 4);
5369   a = (Mat_MPIAIJ *) A->data;
5370   if (lvec) *lvec = a->lvec;
5371   if (colmap) *colmap = a->colmap;
5372   if (multScatter) *multScatter = a->Mvctx;
5373   PetscFunctionReturn(0);
5374 }
5375 
5376 EXTERN_C_BEGIN
5377 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5378 extern PetscErrorCode  MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5379 extern PetscErrorCode  MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5380 EXTERN_C_END
5381 
5382 #undef __FUNCT__
5383 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5384 /*
5385     Computes (B'*A')' since computing B*A directly is untenable
5386 
5387                n                       p                          p
5388         (              )       (              )         (                  )
5389       m (      A       )  *  n (       B      )   =   m (         C        )
5390         (              )       (              )         (                  )
5391 
5392 */
5393 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5394 {
5395   PetscErrorCode     ierr;
5396   Mat                At,Bt,Ct;
5397 
5398   PetscFunctionBegin;
5399   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5400   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5401   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5402   ierr = MatDestroy(&At);CHKERRQ(ierr);
5403   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5404   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5405   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5406   PetscFunctionReturn(0);
5407 }
5408 
5409 #undef __FUNCT__
5410 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5411 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5412 {
5413   PetscErrorCode ierr;
5414   PetscInt       m=A->rmap->n,n=B->cmap->n;
5415   Mat            Cmat;
5416 
5417   PetscFunctionBegin;
5418   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5419   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5420   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5421   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5422   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5423   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5424   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5425   *C   = Cmat;
5426   (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ;
5427   PetscFunctionReturn(0);
5428 }
5429 
5430 /* ----------------------------------------------------------------*/
5431 #undef __FUNCT__
5432 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5433 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5434 {
5435   PetscErrorCode ierr;
5436 
5437   PetscFunctionBegin;
5438   if (scall == MAT_INITIAL_MATRIX){
5439     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5440   }
5441   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5442   PetscFunctionReturn(0);
5443 }
5444 
5445 EXTERN_C_BEGIN
5446 #if defined(PETSC_HAVE_MUMPS)
5447 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5448 #endif
5449 #if defined(PETSC_HAVE_PASTIX)
5450 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5451 #endif
5452 #if defined(PETSC_HAVE_SUPERLU_DIST)
5453 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5454 #endif
5455 #if defined(PETSC_HAVE_SPOOLES)
5456 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5457 #endif
5458 EXTERN_C_END
5459 
5460 /*MC
5461    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5462 
5463    Options Database Keys:
5464 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5465 
5466   Level: beginner
5467 
5468 .seealso: MatCreateAIJ()
5469 M*/
5470 
5471 EXTERN_C_BEGIN
5472 #undef __FUNCT__
5473 #define __FUNCT__ "MatCreate_MPIAIJ"
5474 PetscErrorCode  MatCreate_MPIAIJ(Mat B)
5475 {
5476   Mat_MPIAIJ     *b;
5477   PetscErrorCode ierr;
5478   PetscMPIInt    size;
5479 
5480   PetscFunctionBegin;
5481   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5482 
5483   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5484   B->data         = (void*)b;
5485   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5486   B->rmap->bs     = 1;
5487   B->assembled    = PETSC_FALSE;
5488 
5489   B->insertmode   = NOT_SET_VALUES;
5490   b->size         = size;
5491   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5492 
5493   /* build cache for off array entries formed */
5494   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5495   b->donotstash  = PETSC_FALSE;
5496   b->colmap      = 0;
5497   b->garray      = 0;
5498   b->roworiented = PETSC_TRUE;
5499 
5500   /* stuff used for matrix vector multiply */
5501   b->lvec      = PETSC_NULL;
5502   b->Mvctx     = PETSC_NULL;
5503 
5504   /* stuff for MatGetRow() */
5505   b->rowindices   = 0;
5506   b->rowvalues    = 0;
5507   b->getrowactive = PETSC_FALSE;
5508 
5509 #if defined(PETSC_HAVE_SPOOLES)
5510   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5511                                      "MatGetFactor_mpiaij_spooles",
5512                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5513 #endif
5514 #if defined(PETSC_HAVE_MUMPS)
5515   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5516                                      "MatGetFactor_aij_mumps",
5517                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5518 #endif
5519 #if defined(PETSC_HAVE_PASTIX)
5520   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5521 					   "MatGetFactor_mpiaij_pastix",
5522 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5523 #endif
5524 #if defined(PETSC_HAVE_SUPERLU_DIST)
5525   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5526                                      "MatGetFactor_mpiaij_superlu_dist",
5527                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5528 #endif
5529   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5530                                      "MatStoreValues_MPIAIJ",
5531                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5532   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5533                                      "MatRetrieveValues_MPIAIJ",
5534                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5535   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5536 				     "MatGetDiagonalBlock_MPIAIJ",
5537                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5538   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5539 				     "MatIsTranspose_MPIAIJ",
5540 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5541   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5542 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5543 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5544   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5545 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5546 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5547   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5548 				     "MatDiagonalScaleLocal_MPIAIJ",
5549 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5550   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5551                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5552                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5553   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5554                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5555                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5556   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5557                                      "MatConvert_MPIAIJ_MPISBAIJ",
5558                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5559   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5560                                      "MatMatMult_MPIDense_MPIAIJ",
5561                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5562   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5563                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5564                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5565   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5566                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5567                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5568   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5569   PetscFunctionReturn(0);
5570 }
5571 EXTERN_C_END
5572 
5573 #undef __FUNCT__
5574 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5575 /*@
5576      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5577          and "off-diagonal" part of the matrix in CSR format.
5578 
5579    Collective on MPI_Comm
5580 
5581    Input Parameters:
5582 +  comm - MPI communicator
5583 .  m - number of local rows (Cannot be PETSC_DECIDE)
5584 .  n - This value should be the same as the local size used in creating the
5585        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5586        calculated if N is given) For square matrices n is almost always m.
5587 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5588 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5589 .   i - row indices for "diagonal" portion of matrix
5590 .   j - column indices
5591 .   a - matrix values
5592 .   oi - row indices for "off-diagonal" portion of matrix
5593 .   oj - column indices
5594 -   oa - matrix values
5595 
5596    Output Parameter:
5597 .   mat - the matrix
5598 
5599    Level: advanced
5600 
5601    Notes:
5602        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5603        must free the arrays once the matrix has been destroyed and not before.
5604 
5605        The i and j indices are 0 based
5606 
5607        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5608 
5609        This sets local rows and cannot be used to set off-processor values.
5610 
5611        You cannot later use MatSetValues() to change values in this matrix.
5612 
5613 .keywords: matrix, aij, compressed row, sparse, parallel
5614 
5615 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5616           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5617 @*/
5618 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5619 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5620 {
5621   PetscErrorCode ierr;
5622   Mat_MPIAIJ     *maij;
5623 
5624  PetscFunctionBegin;
5625   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5626   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5627   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5628   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5629   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5630   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5631   maij = (Mat_MPIAIJ*) (*mat)->data;
5632   maij->donotstash     = PETSC_TRUE;
5633   (*mat)->preallocated = PETSC_TRUE;
5634 
5635   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5636   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5637   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5638   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5639 
5640   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5641   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5642 
5643   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5644   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5645   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5646   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5647 
5648   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5649   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5650   PetscFunctionReturn(0);
5651 }
5652 
5653 /*
5654     Special version for direct calls from Fortran
5655 */
5656 #include <petsc-private/fortranimpl.h>
5657 
5658 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5659 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5660 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5661 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5662 #endif
5663 
5664 /* Change these macros so can be used in void function */
5665 #undef CHKERRQ
5666 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5667 #undef SETERRQ2
5668 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5669 #undef SETERRQ3
5670 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5671 #undef SETERRQ
5672 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5673 
5674 EXTERN_C_BEGIN
5675 #undef __FUNCT__
5676 #define __FUNCT__ "matsetvaluesmpiaij_"
5677 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5678 {
5679   Mat             mat = *mmat;
5680   PetscInt        m = *mm, n = *mn;
5681   InsertMode      addv = *maddv;
5682   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5683   PetscScalar     value;
5684   PetscErrorCode  ierr;
5685 
5686   MatCheckPreallocated(mat,1);
5687   if (mat->insertmode == NOT_SET_VALUES) {
5688     mat->insertmode = addv;
5689   }
5690 #if defined(PETSC_USE_DEBUG)
5691   else if (mat->insertmode != addv) {
5692     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5693   }
5694 #endif
5695   {
5696   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5697   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5698   PetscBool       roworiented = aij->roworiented;
5699 
5700   /* Some Variables required in the macro */
5701   Mat             A = aij->A;
5702   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5703   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5704   MatScalar       *aa = a->a;
5705   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5706   Mat             B = aij->B;
5707   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5708   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5709   MatScalar       *ba = b->a;
5710 
5711   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5712   PetscInt        nonew = a->nonew;
5713   MatScalar       *ap1,*ap2;
5714 
5715   PetscFunctionBegin;
5716   for (i=0; i<m; i++) {
5717     if (im[i] < 0) continue;
5718 #if defined(PETSC_USE_DEBUG)
5719     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5720 #endif
5721     if (im[i] >= rstart && im[i] < rend) {
5722       row      = im[i] - rstart;
5723       lastcol1 = -1;
5724       rp1      = aj + ai[row];
5725       ap1      = aa + ai[row];
5726       rmax1    = aimax[row];
5727       nrow1    = ailen[row];
5728       low1     = 0;
5729       high1    = nrow1;
5730       lastcol2 = -1;
5731       rp2      = bj + bi[row];
5732       ap2      = ba + bi[row];
5733       rmax2    = bimax[row];
5734       nrow2    = bilen[row];
5735       low2     = 0;
5736       high2    = nrow2;
5737 
5738       for (j=0; j<n; j++) {
5739         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5740         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5741         if (in[j] >= cstart && in[j] < cend){
5742           col = in[j] - cstart;
5743           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5744         } else if (in[j] < 0) continue;
5745 #if defined(PETSC_USE_DEBUG)
5746         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5747 #endif
5748         else {
5749           if (mat->was_assembled) {
5750             if (!aij->colmap) {
5751               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5752             }
5753 #if defined (PETSC_USE_CTABLE)
5754             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5755 	    col--;
5756 #else
5757             col = aij->colmap[in[j]] - 1;
5758 #endif
5759             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5760               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5761               col =  in[j];
5762               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5763               B = aij->B;
5764               b = (Mat_SeqAIJ*)B->data;
5765               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5766               rp2      = bj + bi[row];
5767               ap2      = ba + bi[row];
5768               rmax2    = bimax[row];
5769               nrow2    = bilen[row];
5770               low2     = 0;
5771               high2    = nrow2;
5772               bm       = aij->B->rmap->n;
5773               ba = b->a;
5774             }
5775           } else col = in[j];
5776           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5777         }
5778       }
5779     } else {
5780       if (!aij->donotstash) {
5781         if (roworiented) {
5782           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5783         } else {
5784           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5785         }
5786       }
5787     }
5788   }}
5789   PetscFunctionReturnVoid();
5790 }
5791 EXTERN_C_END
5792 
5793