xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4cefc2ff75e4fd40cdd0827272dcde5d684e9831)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   for (i=0; i<m; i++) {
502     if (im[i] < 0) continue;
503 #if defined(PETSC_USE_DEBUG)
504     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
505 #endif
506     if (im[i] >= rstart && im[i] < rend) {
507       row      = im[i] - rstart;
508       lastcol1 = -1;
509       rp1      = aj + ai[row];
510       ap1      = aa + ai[row];
511       rmax1    = aimax[row];
512       nrow1    = ailen[row];
513       low1     = 0;
514       high1    = nrow1;
515       lastcol2 = -1;
516       rp2      = bj + bi[row];
517       ap2      = ba + bi[row];
518       rmax2    = bimax[row];
519       nrow2    = bilen[row];
520       low2     = 0;
521       high2    = nrow2;
522 
523       for (j=0; j<n; j++) {
524         if (roworiented) value = v[i*n+j];
525         else             value = v[i+j*m];
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend) {
528           col   = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined(PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542             col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col  =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B     = aij->B;
551               b     = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2   = bj + bi[row];
554               ap2   = ba + bi[row];
555               rmax2 = bimax[row];
556               nrow2 = bilen[row];
557               low2  = 0;
558               high2 = nrow2;
559               bm    = aij->B->rmap->n;
560               ba    = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend) {
601           col  = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined(PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col--;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       nstash,reallocs;
633   InsertMode     addv;
634 
635   PetscFunctionBegin;
636   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
637 
638   /* make sure all processors are either in INSERTMODE or ADDMODE */
639   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
641   mat->insertmode = addv; /* in case this processor had no cache */
642 
643   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
644   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
645   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
652 {
653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
654   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
655   PetscErrorCode ierr;
656   PetscMPIInt    n;
657   PetscInt       i,j,rstart,ncols,flg;
658   PetscInt       *row,*col;
659   PetscBool      other_disassembled;
660   PetscScalar    *val;
661   InsertMode     addv = mat->insertmode;
662 
663   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
664 
665   PetscFunctionBegin;
666   if (!aij->donotstash && !mat->nooffprocentries) {
667     while (1) {
668       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
669       if (!flg) break;
670 
671       for (i=0; i<n; ) {
672         /* Now identify the consecutive vals belonging to the same row */
673         for (j=i,rstart=row[j]; j<n; j++) {
674           if (row[j] != rstart) break;
675         }
676         if (j < n) ncols = j-i;
677         else       ncols = n-i;
678         /* Now assemble all these values with a single function call */
679         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
680 
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
706   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
707 
708   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
709 
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718 
719   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
720   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
721     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
722     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
730 {
731   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
732   PetscErrorCode ierr;
733 
734   PetscFunctionBegin;
735   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
736   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatZeroRows_MPIAIJ"
742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
743 {
744   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
745   PetscInt      *owners = A->rmap->range;
746   PetscInt       n      = A->rmap->n;
747   PetscSF        sf;
748   PetscInt      *lrows;
749   PetscSFNode   *rrows;
750   PetscInt       r, p = 0, len = 0;
751   PetscErrorCode ierr;
752 
753   PetscFunctionBegin;
754   /* Create SF where leaves are input rows and roots are owned rows */
755   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
756   for (r = 0; r < n; ++r) lrows[r] = -1;
757   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
758   for (r = 0; r < N; ++r) {
759     const PetscInt idx   = rows[r];
760     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
761     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
762       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
763     }
764     if (A->nooffproczerorows) {
765       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
766       lrows[len++] = idx - owners[p];
767     } else {
768       rrows[r].rank = p;
769       rrows[r].index = rows[r] - owners[p];
770     }
771   }
772   if (!A->nooffproczerorows) {
773     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
774     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
775     /* Collect flags for rows to be zeroed */
776     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
777     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
778     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
779     /* Compress and put in row numbers */
780     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
781   }
782   /* fix right hand side if needed */
783   if (x && b) {
784     const PetscScalar *xx;
785     PetscScalar       *bb;
786 
787     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
788     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
789     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
790     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
791     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
792   }
793   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
794   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
795   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
796     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
797   } else if (diag != 0.0) {
798     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
799     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
800     for (r = 0; r < len; ++r) {
801       const PetscInt row = lrows[r] + A->rmap->rstart;
802       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
803     }
804     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
805     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806   } else {
807     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
808   }
809   ierr = PetscFree(lrows);CHKERRQ(ierr);
810 
811   /* only change matrix nonzero state if pattern was allowed to be changed */
812   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
813     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
814     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
822 {
823   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
824   PetscErrorCode    ierr;
825   PetscMPIInt       n = A->rmap->n;
826   PetscInt          i,j,r,m,p = 0,len = 0;
827   PetscInt          *lrows,*owners = A->rmap->range;
828   PetscSFNode       *rrows;
829   PetscSF           sf;
830   const PetscScalar *xx;
831   PetscScalar       *bb,*mask;
832   Vec               xmask,lmask;
833   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
834   const PetscInt    *aj, *ii,*ridx;
835   PetscScalar       *aa;
836 
837   PetscFunctionBegin;
838   /* Create SF where leaves are input rows and roots are owned rows */
839   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
840   for (r = 0; r < n; ++r) lrows[r] = -1;
841   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
842   for (r = 0; r < N; ++r) {
843     const PetscInt idx   = rows[r];
844     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
845     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
846       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
847     }
848     rrows[r].rank  = p;
849     rrows[r].index = rows[r] - owners[p];
850   }
851   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
852   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
853   /* Collect flags for rows to be zeroed */
854   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
856   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
857   /* Compress and put in row numbers */
858   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
859   /* zero diagonal part of matrix */
860   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
861   /* handle off diagonal part of matrix */
862   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
863   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
864   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
865   for (i=0; i<len; i++) bb[lrows[i]] = 1;
866   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
867   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
870   if (x) {
871     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
874     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
875   }
876   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
877   /* remove zeroed rows of off diagonal matrix */
878   ii = aij->i;
879   for (i=0; i<len; i++) {
880     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
881   }
882   /* loop over all elements of off process part of matrix zeroing removed columns*/
883   if (aij->compressedrow.use) {
884     m    = aij->compressedrow.nrows;
885     ii   = aij->compressedrow.i;
886     ridx = aij->compressedrow.rindex;
887     for (i=0; i<m; i++) {
888       n  = ii[i+1] - ii[i];
889       aj = aij->j + ii[i];
890       aa = aij->a + ii[i];
891 
892       for (j=0; j<n; j++) {
893         if (PetscAbsScalar(mask[*aj])) {
894           if (b) bb[*ridx] -= *aa*xx[*aj];
895           *aa = 0.0;
896         }
897         aa++;
898         aj++;
899       }
900       ridx++;
901     }
902   } else { /* do not use compressed row format */
903     m = l->B->rmap->n;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908       for (j=0; j<n; j++) {
909         if (PetscAbsScalar(mask[*aj])) {
910           if (b) bb[i] -= *aa*xx[*aj];
911           *aa = 0.0;
912         }
913         aa++;
914         aj++;
915       }
916     }
917   }
918   if (x) {
919     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
920     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
921   }
922   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
923   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
924   ierr = PetscFree(lrows);CHKERRQ(ierr);
925 
926   /* only change matrix nonzero state if pattern was allowed to be changed */
927   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
928     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
929     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatMult_MPIAIJ"
936 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
937 {
938   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
939   PetscErrorCode ierr;
940   PetscInt       nt;
941 
942   PetscFunctionBegin;
943   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
944   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
945   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
947   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
948   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
949   PetscFunctionReturn(0);
950 }
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
954 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
955 {
956   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
957   PetscErrorCode ierr;
958 
959   PetscFunctionBegin;
960   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
961   PetscFunctionReturn(0);
962 }
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "MatMultAdd_MPIAIJ"
966 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
967 {
968   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
969   PetscErrorCode ierr;
970 
971   PetscFunctionBegin;
972   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
973   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
974   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
976   PetscFunctionReturn(0);
977 }
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
981 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
982 {
983   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
984   PetscErrorCode ierr;
985   PetscBool      merged;
986 
987   PetscFunctionBegin;
988   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
989   /* do nondiagonal part */
990   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
991   if (!merged) {
992     /* send it on its way */
993     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
994     /* do local part */
995     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
996     /* receive remote parts: note this assumes the values are not actually */
997     /* added in yy until the next line, */
998     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
999   } else {
1000     /* do local part */
1001     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1002     /* send it on its way */
1003     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1004     /* values actually were received in the Begin() but we need to call this nop */
1005     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1006   }
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 #undef __FUNCT__
1011 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1012 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1013 {
1014   MPI_Comm       comm;
1015   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1016   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1017   IS             Me,Notme;
1018   PetscErrorCode ierr;
1019   PetscInt       M,N,first,last,*notme,i;
1020   PetscMPIInt    size;
1021 
1022   PetscFunctionBegin;
1023   /* Easy test: symmetric diagonal block */
1024   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1025   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1026   if (!*f) PetscFunctionReturn(0);
1027   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1028   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1029   if (size == 1) PetscFunctionReturn(0);
1030 
1031   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1032   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1033   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1034   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1035   for (i=0; i<first; i++) notme[i] = i;
1036   for (i=last; i<M; i++) notme[i-last+first] = i;
1037   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1038   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1039   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1040   Aoff = Aoffs[0];
1041   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1042   Boff = Boffs[0];
1043   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1044   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1045   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1046   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1047   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1048   ierr = PetscFree(notme);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1055 {
1056   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   /* do nondiagonal part */
1061   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1062   /* send it on its way */
1063   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1064   /* do local part */
1065   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1066   /* receive remote parts */
1067   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 /*
1072   This only works correctly for square matrices where the subblock A->A is the
1073    diagonal block
1074 */
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1077 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1078 {
1079   PetscErrorCode ierr;
1080   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1081 
1082   PetscFunctionBegin;
1083   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1084   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1085   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatScale_MPIAIJ"
1091 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1092 {
1093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1098   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatDestroy_Redundant"
1104 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1105 {
1106   PetscErrorCode ierr;
1107   Mat_Redundant  *redund = *redundant;
1108   PetscInt       i;
1109 
1110   PetscFunctionBegin;
1111   *redundant = NULL;
1112   if (redund){
1113     if (redund->matseq) { /* via MatGetSubMatrices()  */
1114       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1115       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1116       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1117       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1118     } else {
1119       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1120       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1121       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1122       for (i=0; i<redund->nrecvs; i++) {
1123         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1124         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1125       }
1126       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1127     }
1128 
1129     if (redund->psubcomm) {
1130       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
1131     }
1132     ierr = PetscFree(redund);CHKERRQ(ierr);
1133   }
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatDestroy_MPIAIJ"
1139 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1140 {
1141   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145 #if defined(PETSC_USE_LOG)
1146   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1147 #endif
1148   ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr);
1149   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1150   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1151   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1152   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1153 #if defined(PETSC_USE_CTABLE)
1154   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1155 #else
1156   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1157 #endif
1158   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1159   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1160   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1161   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1162   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1163   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1164 
1165   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1179 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1180 {
1181   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1182   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1183   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1184   PetscErrorCode ierr;
1185   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1186   int            fd;
1187   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1188   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1189   PetscScalar    *column_values;
1190   PetscInt       message_count,flowcontrolcount;
1191   FILE           *file;
1192 
1193   PetscFunctionBegin;
1194   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1195   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1196   nz   = A->nz + B->nz;
1197   if (!rank) {
1198     header[0] = MAT_FILE_CLASSID;
1199     header[1] = mat->rmap->N;
1200     header[2] = mat->cmap->N;
1201 
1202     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1203     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1204     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1205     /* get largest number of rows any processor has */
1206     rlen  = mat->rmap->n;
1207     range = mat->rmap->range;
1208     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1209   } else {
1210     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1211     rlen = mat->rmap->n;
1212   }
1213 
1214   /* load up the local row counts */
1215   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1216   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1217 
1218   /* store the row lengths to the file */
1219   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1220   if (!rank) {
1221     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1222     for (i=1; i<size; i++) {
1223       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1224       rlen = range[i+1] - range[i];
1225       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1227     }
1228     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1229   } else {
1230     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1231     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1233   }
1234   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1235 
1236   /* load up the local column indices */
1237   nzmax = nz; /* th processor needs space a largest processor needs */
1238   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1240   cnt   = 0;
1241   for (i=0; i<mat->rmap->n; i++) {
1242     for (j=B->i[i]; j<B->i[i+1]; j++) {
1243       if ((col = garray[B->j[j]]) > cstart) break;
1244       column_indices[cnt++] = col;
1245     }
1246     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1247     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1248   }
1249   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1250 
1251   /* store the column indices to the file */
1252   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1253   if (!rank) {
1254     MPI_Status status;
1255     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1256     for (i=1; i<size; i++) {
1257       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1258       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1259       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1260       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1261       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1262     }
1263     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1264   } else {
1265     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1266     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1267     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1268     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1269   }
1270   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1271 
1272   /* load up the local column values */
1273   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1274   cnt  = 0;
1275   for (i=0; i<mat->rmap->n; i++) {
1276     for (j=B->i[i]; j<B->i[i+1]; j++) {
1277       if (garray[B->j[j]] > cstart) break;
1278       column_values[cnt++] = B->a[j];
1279     }
1280     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1281     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1282   }
1283   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1284 
1285   /* store the column values to the file */
1286   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1287   if (!rank) {
1288     MPI_Status status;
1289     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1290     for (i=1; i<size; i++) {
1291       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1292       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1293       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1294       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1295       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1296     }
1297     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1298   } else {
1299     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1300     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1301     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1302     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1303   }
1304   ierr = PetscFree(column_values);CHKERRQ(ierr);
1305 
1306   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1307   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #include <petscdraw.h>
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1314 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1315 {
1316   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank = aij->rank,size = aij->size;
1319   PetscBool         isdraw,iascii,isbinary;
1320   PetscViewer       sviewer;
1321   PetscViewerFormat format;
1322 
1323   PetscFunctionBegin;
1324   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1325   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1326   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1327   if (iascii) {
1328     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1329     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1330       MatInfo   info;
1331       PetscBool inodes;
1332 
1333       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1334       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1335       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1336       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1337       if (!inodes) {
1338         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1339                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1340       } else {
1341         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1342                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1343       }
1344       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1345       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1346       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1347       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1348       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1349       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1350       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1351       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1352       PetscFunctionReturn(0);
1353     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1354       PetscInt inodecount,inodelimit,*inodes;
1355       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1356       if (inodes) {
1357         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1358       } else {
1359         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1360       }
1361       PetscFunctionReturn(0);
1362     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1363       PetscFunctionReturn(0);
1364     }
1365   } else if (isbinary) {
1366     if (size == 1) {
1367       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1368       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1369     } else {
1370       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1371     }
1372     PetscFunctionReturn(0);
1373   } else if (isdraw) {
1374     PetscDraw draw;
1375     PetscBool isnull;
1376     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1377     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1378   }
1379 
1380   {
1381     /* assemble the entire matrix onto first processor. */
1382     Mat        A;
1383     Mat_SeqAIJ *Aloc;
1384     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1385     MatScalar  *a;
1386 
1387     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1388     if (!rank) {
1389       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1390     } else {
1391       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1392     }
1393     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1394     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1395     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1396     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1397     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1398 
1399     /* copy over the A part */
1400     Aloc = (Mat_SeqAIJ*)aij->A->data;
1401     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1402     row  = mat->rmap->rstart;
1403     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1404     for (i=0; i<m; i++) {
1405       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1406       row++;
1407       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1408     }
1409     aj = Aloc->j;
1410     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1411 
1412     /* copy over the B part */
1413     Aloc = (Mat_SeqAIJ*)aij->B->data;
1414     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1415     row  = mat->rmap->rstart;
1416     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1417     ct   = cols;
1418     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1419     for (i=0; i<m; i++) {
1420       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1421       row++;
1422       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1423     }
1424     ierr = PetscFree(ct);CHKERRQ(ierr);
1425     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1426     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1427     /*
1428        Everyone has to call to draw the matrix since the graphics waits are
1429        synchronized across all processors that share the PetscDraw object
1430     */
1431     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1432     if (!rank) {
1433       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1434     }
1435     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1436     ierr = MatDestroy(&A);CHKERRQ(ierr);
1437   }
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatView_MPIAIJ"
1443 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1444 {
1445   PetscErrorCode ierr;
1446   PetscBool      iascii,isdraw,issocket,isbinary;
1447 
1448   PetscFunctionBegin;
1449   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1450   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1451   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1452   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1453   if (iascii || isdraw || isbinary || issocket) {
1454     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatSOR_MPIAIJ"
1461 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1462 {
1463   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1464   PetscErrorCode ierr;
1465   Vec            bb1 = 0;
1466   PetscBool      hasop;
1467 
1468   PetscFunctionBegin;
1469   if (flag == SOR_APPLY_UPPER) {
1470     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1471     PetscFunctionReturn(0);
1472   }
1473 
1474   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1475     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1476   }
1477 
1478   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1479     if (flag & SOR_ZERO_INITIAL_GUESS) {
1480       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1481       its--;
1482     }
1483 
1484     while (its--) {
1485       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1486       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1487 
1488       /* update rhs: bb1 = bb - B*x */
1489       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1490       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1491 
1492       /* local sweep */
1493       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1494     }
1495   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1496     if (flag & SOR_ZERO_INITIAL_GUESS) {
1497       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1498       its--;
1499     }
1500     while (its--) {
1501       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1502       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503 
1504       /* update rhs: bb1 = bb - B*x */
1505       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1506       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1507 
1508       /* local sweep */
1509       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1510     }
1511   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1512     if (flag & SOR_ZERO_INITIAL_GUESS) {
1513       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1514       its--;
1515     }
1516     while (its--) {
1517       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1518       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519 
1520       /* update rhs: bb1 = bb - B*x */
1521       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1522       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1523 
1524       /* local sweep */
1525       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1526     }
1527   } else if (flag & SOR_EISENSTAT) {
1528     Vec xx1;
1529 
1530     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1531     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1532 
1533     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1534     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535     if (!mat->diag) {
1536       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1537       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1538     }
1539     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1540     if (hasop) {
1541       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1542     } else {
1543       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1544     }
1545     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1546 
1547     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1548 
1549     /* local sweep */
1550     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1551     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1552     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1553   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1554 
1555   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatPermute_MPIAIJ"
1561 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1562 {
1563   Mat            aA,aB,Aperm;
1564   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1565   PetscScalar    *aa,*ba;
1566   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1567   PetscSF        rowsf,sf;
1568   IS             parcolp = NULL;
1569   PetscBool      done;
1570   PetscErrorCode ierr;
1571 
1572   PetscFunctionBegin;
1573   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1574   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1575   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1576   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1577 
1578   /* Invert row permutation to find out where my rows should go */
1579   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1580   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1581   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1582   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1583   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1584   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1585 
1586   /* Invert column permutation to find out where my columns should go */
1587   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1588   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1589   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1590   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1591   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1592   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1593   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1594 
1595   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1596   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1597   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1598 
1599   /* Find out where my gcols should go */
1600   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1601   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1602   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1603   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1604   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1605   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1606   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1607   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1608 
1609   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1610   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1611   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1612   for (i=0; i<m; i++) {
1613     PetscInt row = rdest[i],rowner;
1614     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1615     for (j=ai[i]; j<ai[i+1]; j++) {
1616       PetscInt cowner,col = cdest[aj[j]];
1617       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1618       if (rowner == cowner) dnnz[i]++;
1619       else onnz[i]++;
1620     }
1621     for (j=bi[i]; j<bi[i+1]; j++) {
1622       PetscInt cowner,col = gcdest[bj[j]];
1623       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1624       if (rowner == cowner) dnnz[i]++;
1625       else onnz[i]++;
1626     }
1627   }
1628   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1629   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1630   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1631   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1632   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1633 
1634   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1635   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1636   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1637   for (i=0; i<m; i++) {
1638     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1639     PetscInt j0,rowlen;
1640     rowlen = ai[i+1] - ai[i];
1641     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1642       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1643       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1644     }
1645     rowlen = bi[i+1] - bi[i];
1646     for (j0=j=0; j<rowlen; j0=j) {
1647       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1648       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1649     }
1650   }
1651   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1652   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1653   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1654   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1655   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1656   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1657   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1658   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1659   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1660   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1661   *B = Aperm;
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 #undef __FUNCT__
1666 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1667 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1668 {
1669   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1670   Mat            A    = mat->A,B = mat->B;
1671   PetscErrorCode ierr;
1672   PetscReal      isend[5],irecv[5];
1673 
1674   PetscFunctionBegin;
1675   info->block_size = 1.0;
1676   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1677 
1678   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1679   isend[3] = info->memory;  isend[4] = info->mallocs;
1680 
1681   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1682 
1683   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1684   isend[3] += info->memory;  isend[4] += info->mallocs;
1685   if (flag == MAT_LOCAL) {
1686     info->nz_used      = isend[0];
1687     info->nz_allocated = isend[1];
1688     info->nz_unneeded  = isend[2];
1689     info->memory       = isend[3];
1690     info->mallocs      = isend[4];
1691   } else if (flag == MAT_GLOBAL_MAX) {
1692     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1693 
1694     info->nz_used      = irecv[0];
1695     info->nz_allocated = irecv[1];
1696     info->nz_unneeded  = irecv[2];
1697     info->memory       = irecv[3];
1698     info->mallocs      = irecv[4];
1699   } else if (flag == MAT_GLOBAL_SUM) {
1700     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1701 
1702     info->nz_used      = irecv[0];
1703     info->nz_allocated = irecv[1];
1704     info->nz_unneeded  = irecv[2];
1705     info->memory       = irecv[3];
1706     info->mallocs      = irecv[4];
1707   }
1708   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1709   info->fill_ratio_needed = 0;
1710   info->factor_mallocs    = 0;
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatSetOption_MPIAIJ"
1716 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1717 {
1718   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1719   PetscErrorCode ierr;
1720 
1721   PetscFunctionBegin;
1722   switch (op) {
1723   case MAT_NEW_NONZERO_LOCATIONS:
1724   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1725   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1726   case MAT_KEEP_NONZERO_PATTERN:
1727   case MAT_NEW_NONZERO_LOCATION_ERR:
1728   case MAT_USE_INODES:
1729   case MAT_IGNORE_ZERO_ENTRIES:
1730     MatCheckPreallocated(A,1);
1731     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1732     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1733     break;
1734   case MAT_ROW_ORIENTED:
1735     a->roworiented = flg;
1736 
1737     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1738     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1739     break;
1740   case MAT_NEW_DIAGONALS:
1741     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1742     break;
1743   case MAT_IGNORE_OFF_PROC_ENTRIES:
1744     a->donotstash = flg;
1745     break;
1746   case MAT_SPD:
1747     A->spd_set = PETSC_TRUE;
1748     A->spd     = flg;
1749     if (flg) {
1750       A->symmetric                  = PETSC_TRUE;
1751       A->structurally_symmetric     = PETSC_TRUE;
1752       A->symmetric_set              = PETSC_TRUE;
1753       A->structurally_symmetric_set = PETSC_TRUE;
1754     }
1755     break;
1756   case MAT_SYMMETRIC:
1757     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1758     break;
1759   case MAT_STRUCTURALLY_SYMMETRIC:
1760     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1761     break;
1762   case MAT_HERMITIAN:
1763     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1764     break;
1765   case MAT_SYMMETRY_ETERNAL:
1766     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1767     break;
1768   default:
1769     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1770   }
1771   PetscFunctionReturn(0);
1772 }
1773 
1774 #undef __FUNCT__
1775 #define __FUNCT__ "MatGetRow_MPIAIJ"
1776 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1777 {
1778   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1779   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1780   PetscErrorCode ierr;
1781   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1782   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1783   PetscInt       *cmap,*idx_p;
1784 
1785   PetscFunctionBegin;
1786   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1787   mat->getrowactive = PETSC_TRUE;
1788 
1789   if (!mat->rowvalues && (idx || v)) {
1790     /*
1791         allocate enough space to hold information from the longest row.
1792     */
1793     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1794     PetscInt   max = 1,tmp;
1795     for (i=0; i<matin->rmap->n; i++) {
1796       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1797       if (max < tmp) max = tmp;
1798     }
1799     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1800   }
1801 
1802   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1803   lrow = row - rstart;
1804 
1805   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1806   if (!v)   {pvA = 0; pvB = 0;}
1807   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1808   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1809   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1810   nztot = nzA + nzB;
1811 
1812   cmap = mat->garray;
1813   if (v  || idx) {
1814     if (nztot) {
1815       /* Sort by increasing column numbers, assuming A and B already sorted */
1816       PetscInt imark = -1;
1817       if (v) {
1818         *v = v_p = mat->rowvalues;
1819         for (i=0; i<nzB; i++) {
1820           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1821           else break;
1822         }
1823         imark = i;
1824         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1825         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1826       }
1827       if (idx) {
1828         *idx = idx_p = mat->rowindices;
1829         if (imark > -1) {
1830           for (i=0; i<imark; i++) {
1831             idx_p[i] = cmap[cworkB[i]];
1832           }
1833         } else {
1834           for (i=0; i<nzB; i++) {
1835             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1836             else break;
1837           }
1838           imark = i;
1839         }
1840         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1841         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1842       }
1843     } else {
1844       if (idx) *idx = 0;
1845       if (v)   *v   = 0;
1846     }
1847   }
1848   *nz  = nztot;
1849   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1850   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 #undef __FUNCT__
1855 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1856 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1857 {
1858   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1859 
1860   PetscFunctionBegin;
1861   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1862   aij->getrowactive = PETSC_FALSE;
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 #undef __FUNCT__
1867 #define __FUNCT__ "MatNorm_MPIAIJ"
1868 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1869 {
1870   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1871   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1872   PetscErrorCode ierr;
1873   PetscInt       i,j,cstart = mat->cmap->rstart;
1874   PetscReal      sum = 0.0;
1875   MatScalar      *v;
1876 
1877   PetscFunctionBegin;
1878   if (aij->size == 1) {
1879     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1880   } else {
1881     if (type == NORM_FROBENIUS) {
1882       v = amat->a;
1883       for (i=0; i<amat->nz; i++) {
1884         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1885       }
1886       v = bmat->a;
1887       for (i=0; i<bmat->nz; i++) {
1888         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1889       }
1890       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1891       *norm = PetscSqrtReal(*norm);
1892     } else if (type == NORM_1) { /* max column norm */
1893       PetscReal *tmp,*tmp2;
1894       PetscInt  *jj,*garray = aij->garray;
1895       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1896       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1897       *norm = 0.0;
1898       v     = amat->a; jj = amat->j;
1899       for (j=0; j<amat->nz; j++) {
1900         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1901       }
1902       v = bmat->a; jj = bmat->j;
1903       for (j=0; j<bmat->nz; j++) {
1904         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1905       }
1906       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1907       for (j=0; j<mat->cmap->N; j++) {
1908         if (tmp2[j] > *norm) *norm = tmp2[j];
1909       }
1910       ierr = PetscFree(tmp);CHKERRQ(ierr);
1911       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1912     } else if (type == NORM_INFINITY) { /* max row norm */
1913       PetscReal ntemp = 0.0;
1914       for (j=0; j<aij->A->rmap->n; j++) {
1915         v   = amat->a + amat->i[j];
1916         sum = 0.0;
1917         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1918           sum += PetscAbsScalar(*v); v++;
1919         }
1920         v = bmat->a + bmat->i[j];
1921         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1922           sum += PetscAbsScalar(*v); v++;
1923         }
1924         if (sum > ntemp) ntemp = sum;
1925       }
1926       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1927     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1928   }
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatTranspose_MPIAIJ"
1934 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1935 {
1936   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1937   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1938   PetscErrorCode ierr;
1939   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1940   PetscInt       cstart = A->cmap->rstart,ncol;
1941   Mat            B;
1942   MatScalar      *array;
1943 
1944   PetscFunctionBegin;
1945   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1946 
1947   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1948   ai = Aloc->i; aj = Aloc->j;
1949   bi = Bloc->i; bj = Bloc->j;
1950   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1951     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1952     PetscSFNode          *oloc;
1953     PETSC_UNUSED PetscSF sf;
1954 
1955     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1956     /* compute d_nnz for preallocation */
1957     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1958     for (i=0; i<ai[ma]; i++) {
1959       d_nnz[aj[i]]++;
1960       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1961     }
1962     /* compute local off-diagonal contributions */
1963     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1964     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1965     /* map those to global */
1966     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1967     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1968     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1969     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1970     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1971     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1972     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1973 
1974     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1975     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1976     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1977     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1978     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1979     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1980   } else {
1981     B    = *matout;
1982     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1983     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1984   }
1985 
1986   /* copy over the A part */
1987   array = Aloc->a;
1988   row   = A->rmap->rstart;
1989   for (i=0; i<ma; i++) {
1990     ncol = ai[i+1]-ai[i];
1991     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1992     row++;
1993     array += ncol; aj += ncol;
1994   }
1995   aj = Aloc->j;
1996   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1997 
1998   /* copy over the B part */
1999   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2000   array = Bloc->a;
2001   row   = A->rmap->rstart;
2002   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2003   cols_tmp = cols;
2004   for (i=0; i<mb; i++) {
2005     ncol = bi[i+1]-bi[i];
2006     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2007     row++;
2008     array += ncol; cols_tmp += ncol;
2009   }
2010   ierr = PetscFree(cols);CHKERRQ(ierr);
2011 
2012   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2013   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2014   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2015     *matout = B;
2016   } else {
2017     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2018   }
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 #undef __FUNCT__
2023 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2024 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2025 {
2026   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2027   Mat            a    = aij->A,b = aij->B;
2028   PetscErrorCode ierr;
2029   PetscInt       s1,s2,s3;
2030 
2031   PetscFunctionBegin;
2032   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2033   if (rr) {
2034     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2035     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2036     /* Overlap communication with computation. */
2037     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2038   }
2039   if (ll) {
2040     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2041     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2042     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2043   }
2044   /* scale  the diagonal block */
2045   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2046 
2047   if (rr) {
2048     /* Do a scatter end and then right scale the off-diagonal block */
2049     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2050     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2051   }
2052   PetscFunctionReturn(0);
2053 }
2054 
2055 #undef __FUNCT__
2056 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2057 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2058 {
2059   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2060   PetscErrorCode ierr;
2061 
2062   PetscFunctionBegin;
2063   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 #undef __FUNCT__
2068 #define __FUNCT__ "MatEqual_MPIAIJ"
2069 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2070 {
2071   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2072   Mat            a,b,c,d;
2073   PetscBool      flg;
2074   PetscErrorCode ierr;
2075 
2076   PetscFunctionBegin;
2077   a = matA->A; b = matA->B;
2078   c = matB->A; d = matB->B;
2079 
2080   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2081   if (flg) {
2082     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2083   }
2084   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 #undef __FUNCT__
2089 #define __FUNCT__ "MatCopy_MPIAIJ"
2090 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2091 {
2092   PetscErrorCode ierr;
2093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2094   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2095 
2096   PetscFunctionBegin;
2097   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2098   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2099     /* because of the column compression in the off-processor part of the matrix a->B,
2100        the number of columns in a->B and b->B may be different, hence we cannot call
2101        the MatCopy() directly on the two parts. If need be, we can provide a more
2102        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2103        then copying the submatrices */
2104     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2105   } else {
2106     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2107     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2108   }
2109   PetscFunctionReturn(0);
2110 }
2111 
2112 #undef __FUNCT__
2113 #define __FUNCT__ "MatSetUp_MPIAIJ"
2114 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2115 {
2116   PetscErrorCode ierr;
2117 
2118   PetscFunctionBegin;
2119   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 /*
2124    Computes the number of nonzeros per row needed for preallocation when X and Y
2125    have different nonzero structure.
2126 */
2127 #undef __FUNCT__
2128 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2129 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2130 {
2131   PetscInt       i,j,k,nzx,nzy;
2132 
2133   PetscFunctionBegin;
2134   /* Set the number of nonzeros in the new matrix */
2135   for (i=0; i<m; i++) {
2136     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2137     nzx = xi[i+1] - xi[i];
2138     nzy = yi[i+1] - yi[i];
2139     nnz[i] = 0;
2140     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2141       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2142       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2143       nnz[i]++;
2144     }
2145     for (; k<nzy; k++) nnz[i]++;
2146   }
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2153 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2154 {
2155   PetscErrorCode ierr;
2156   PetscInt       m = Y->rmap->N;
2157   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2158   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2159 
2160   PetscFunctionBegin;
2161   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 #undef __FUNCT__
2166 #define __FUNCT__ "MatAXPY_MPIAIJ"
2167 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2168 {
2169   PetscErrorCode ierr;
2170   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2171   PetscBLASInt   bnz,one=1;
2172   Mat_SeqAIJ     *x,*y;
2173 
2174   PetscFunctionBegin;
2175   if (str == SAME_NONZERO_PATTERN) {
2176     PetscScalar alpha = a;
2177     x    = (Mat_SeqAIJ*)xx->A->data;
2178     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2179     y    = (Mat_SeqAIJ*)yy->A->data;
2180     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2181     x    = (Mat_SeqAIJ*)xx->B->data;
2182     y    = (Mat_SeqAIJ*)yy->B->data;
2183     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2184     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2185     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2186   } else if (str == SUBSET_NONZERO_PATTERN) {
2187     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2188   } else {
2189     Mat      B;
2190     PetscInt *nnz_d,*nnz_o;
2191     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2192     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2193     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2194     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2195     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2196     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2197     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2198     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2199     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2200     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2201     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2202     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2203     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2204     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2205   }
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2210 
2211 #undef __FUNCT__
2212 #define __FUNCT__ "MatConjugate_MPIAIJ"
2213 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2214 {
2215 #if defined(PETSC_USE_COMPLEX)
2216   PetscErrorCode ierr;
2217   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2218 
2219   PetscFunctionBegin;
2220   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2221   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2222 #else
2223   PetscFunctionBegin;
2224 #endif
2225   PetscFunctionReturn(0);
2226 }
2227 
2228 #undef __FUNCT__
2229 #define __FUNCT__ "MatRealPart_MPIAIJ"
2230 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2231 {
2232   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2233   PetscErrorCode ierr;
2234 
2235   PetscFunctionBegin;
2236   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2237   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 #undef __FUNCT__
2242 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2243 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2244 {
2245   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2246   PetscErrorCode ierr;
2247 
2248   PetscFunctionBegin;
2249   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2250   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 #if defined(PETSC_HAVE_PBGL)
2255 
2256 #include <boost/parallel/mpi/bsp_process_group.hpp>
2257 #include <boost/graph/distributed/ilu_default_graph.hpp>
2258 #include <boost/graph/distributed/ilu_0_block.hpp>
2259 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2260 #include <boost/graph/distributed/petsc/interface.hpp>
2261 #include <boost/multi_array.hpp>
2262 #include <boost/parallel/distributed_property_map->hpp>
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2266 /*
2267   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2268 */
2269 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2270 {
2271   namespace petsc = boost::distributed::petsc;
2272 
2273   namespace graph_dist = boost::graph::distributed;
2274   using boost::graph::distributed::ilu_default::process_group_type;
2275   using boost::graph::ilu_permuted;
2276 
2277   PetscBool      row_identity, col_identity;
2278   PetscContainer c;
2279   PetscInt       m, n, M, N;
2280   PetscErrorCode ierr;
2281 
2282   PetscFunctionBegin;
2283   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2284   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2285   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2286   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2287 
2288   process_group_type pg;
2289   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2290   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2291   lgraph_type& level_graph = *lgraph_p;
2292   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2293 
2294   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2295   ilu_permuted(level_graph);
2296 
2297   /* put together the new matrix */
2298   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2299   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2300   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2301   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2302   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2303   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2304   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2305   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2306 
2307   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2308   ierr = PetscContainerSetPointer(c, lgraph_p);
2309   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2310   ierr = PetscContainerDestroy(&c);
2311   PetscFunctionReturn(0);
2312 }
2313 
2314 #undef __FUNCT__
2315 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2316 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2317 {
2318   PetscFunctionBegin;
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 #undef __FUNCT__
2323 #define __FUNCT__ "MatSolve_MPIAIJ"
2324 /*
2325   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2326 */
2327 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2328 {
2329   namespace graph_dist = boost::graph::distributed;
2330 
2331   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2332   lgraph_type    *lgraph_p;
2333   PetscContainer c;
2334   PetscErrorCode ierr;
2335 
2336   PetscFunctionBegin;
2337   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2338   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2339   ierr = VecCopy(b, x);CHKERRQ(ierr);
2340 
2341   PetscScalar *array_x;
2342   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2343   PetscInt sx;
2344   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2345 
2346   PetscScalar *array_b;
2347   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2348   PetscInt sb;
2349   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2350 
2351   lgraph_type& level_graph = *lgraph_p;
2352   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2353 
2354   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2355   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2356   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2357 
2358   typedef boost::iterator_property_map<array_ref_type::iterator,
2359                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2360   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2361   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2362 
2363   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2364   PetscFunctionReturn(0);
2365 }
2366 #endif
2367 
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2371 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2372 {
2373   PetscMPIInt    rank,size;
2374   MPI_Comm       comm;
2375   PetscErrorCode ierr;
2376   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2377   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2378   PetscInt       *rowrange = mat->rmap->range;
2379   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2380   Mat            A = aij->A,B=aij->B,C=*matredundant;
2381   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2382   PetscScalar    *sbuf_a;
2383   PetscInt       nzlocal=a->nz+b->nz;
2384   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2385   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2386   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2387   MatScalar      *aworkA,*aworkB;
2388   PetscScalar    *vals;
2389   PetscMPIInt    tag1,tag2,tag3,imdex;
2390   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2391   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2392   MPI_Status     recv_status,*send_status;
2393   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2394   PetscInt       **rbuf_j=NULL;
2395   PetscScalar    **rbuf_a=NULL;
2396   Mat_Redundant  *redund =NULL;
2397 
2398   PetscFunctionBegin;
2399   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2400   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2401   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2402   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2403   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2404 
2405   if (reuse == MAT_REUSE_MATRIX) {
2406     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2407     if (subsize == 1) {
2408       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2409       redund = c->redundant;
2410     } else {
2411       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2412       redund = c->redundant;
2413     }
2414     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2415 
2416     nsends    = redund->nsends;
2417     nrecvs    = redund->nrecvs;
2418     send_rank = redund->send_rank;
2419     recv_rank = redund->recv_rank;
2420     sbuf_nz   = redund->sbuf_nz;
2421     rbuf_nz   = redund->rbuf_nz;
2422     sbuf_j    = redund->sbuf_j;
2423     sbuf_a    = redund->sbuf_a;
2424     rbuf_j    = redund->rbuf_j;
2425     rbuf_a    = redund->rbuf_a;
2426   }
2427 
2428   if (reuse == MAT_INITIAL_MATRIX) {
2429     PetscInt    nleftover,np_subcomm;
2430 
2431     /* get the destination processors' id send_rank, nsends and nrecvs */
2432     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2433 
2434     np_subcomm = size/nsubcomm;
2435     nleftover  = size - nsubcomm*np_subcomm;
2436 
2437     /* block of codes below is specific for INTERLACED */
2438     /* ------------------------------------------------*/
2439     nsends = 0; nrecvs = 0;
2440     for (i=0; i<size; i++) {
2441       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2442         send_rank[nsends++] = i;
2443         recv_rank[nrecvs++] = i;
2444       }
2445     }
2446     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2447       i = size-nleftover-1;
2448       j = 0;
2449       while (j < nsubcomm - nleftover) {
2450         send_rank[nsends++] = i;
2451         i--; j++;
2452       }
2453     }
2454 
2455     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2456       for (i=0; i<nleftover; i++) {
2457         recv_rank[nrecvs++] = size-nleftover+i;
2458       }
2459     }
2460     /*----------------------------------------------*/
2461 
2462     /* allocate sbuf_j, sbuf_a */
2463     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2464     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2465     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2466     /*
2467     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2468     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2469      */
2470   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2471 
2472   /* copy mat's local entries into the buffers */
2473   if (reuse == MAT_INITIAL_MATRIX) {
2474     rownz_max = 0;
2475     rptr      = sbuf_j;
2476     cols      = sbuf_j + rend-rstart + 1;
2477     vals      = sbuf_a;
2478     rptr[0]   = 0;
2479     for (i=0; i<rend-rstart; i++) {
2480       row    = i + rstart;
2481       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2482       ncols  = nzA + nzB;
2483       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2484       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2485       /* load the column indices for this row into cols */
2486       lwrite = 0;
2487       for (l=0; l<nzB; l++) {
2488         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2489           vals[lwrite]   = aworkB[l];
2490           cols[lwrite++] = ctmp;
2491         }
2492       }
2493       for (l=0; l<nzA; l++) {
2494         vals[lwrite]   = aworkA[l];
2495         cols[lwrite++] = cstart + cworkA[l];
2496       }
2497       for (l=0; l<nzB; l++) {
2498         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2499           vals[lwrite]   = aworkB[l];
2500           cols[lwrite++] = ctmp;
2501         }
2502       }
2503       vals     += ncols;
2504       cols     += ncols;
2505       rptr[i+1] = rptr[i] + ncols;
2506       if (rownz_max < ncols) rownz_max = ncols;
2507     }
2508     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2509   } else { /* only copy matrix values into sbuf_a */
2510     rptr    = sbuf_j;
2511     vals    = sbuf_a;
2512     rptr[0] = 0;
2513     for (i=0; i<rend-rstart; i++) {
2514       row    = i + rstart;
2515       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2516       ncols  = nzA + nzB;
2517       cworkB = b->j + b->i[i];
2518       aworkA = a->a + a->i[i];
2519       aworkB = b->a + b->i[i];
2520       lwrite = 0;
2521       for (l=0; l<nzB; l++) {
2522         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2523       }
2524       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2525       for (l=0; l<nzB; l++) {
2526         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2527       }
2528       vals     += ncols;
2529       rptr[i+1] = rptr[i] + ncols;
2530     }
2531   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2532 
2533   /* send nzlocal to others, and recv other's nzlocal */
2534   /*--------------------------------------------------*/
2535   if (reuse == MAT_INITIAL_MATRIX) {
2536     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2537 
2538     s_waits2 = s_waits3 + nsends;
2539     s_waits1 = s_waits2 + nsends;
2540     r_waits1 = s_waits1 + nsends;
2541     r_waits2 = r_waits1 + nrecvs;
2542     r_waits3 = r_waits2 + nrecvs;
2543   } else {
2544     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2545 
2546     r_waits3 = s_waits3 + nsends;
2547   }
2548 
2549   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2550   if (reuse == MAT_INITIAL_MATRIX) {
2551     /* get new tags to keep the communication clean */
2552     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2553     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2554     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2555 
2556     /* post receives of other's nzlocal */
2557     for (i=0; i<nrecvs; i++) {
2558       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2559     }
2560     /* send nzlocal to others */
2561     for (i=0; i<nsends; i++) {
2562       sbuf_nz[i] = nzlocal;
2563       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2564     }
2565     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2566     count = nrecvs;
2567     while (count) {
2568       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2569 
2570       recv_rank[imdex] = recv_status.MPI_SOURCE;
2571       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2572       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2573 
2574       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2575 
2576       rbuf_nz[imdex] += i + 2;
2577 
2578       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2579       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2580       count--;
2581     }
2582     /* wait on sends of nzlocal */
2583     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2584     /* send mat->i,j to others, and recv from other's */
2585     /*------------------------------------------------*/
2586     for (i=0; i<nsends; i++) {
2587       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2588       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2589     }
2590     /* wait on receives of mat->i,j */
2591     /*------------------------------*/
2592     count = nrecvs;
2593     while (count) {
2594       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2595       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2596       count--;
2597     }
2598     /* wait on sends of mat->i,j */
2599     /*---------------------------*/
2600     if (nsends) {
2601       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2602     }
2603   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2604 
2605   /* post receives, send and receive mat->a */
2606   /*----------------------------------------*/
2607   for (imdex=0; imdex<nrecvs; imdex++) {
2608     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2609   }
2610   for (i=0; i<nsends; i++) {
2611     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2612   }
2613   count = nrecvs;
2614   while (count) {
2615     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2616     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2617     count--;
2618   }
2619   if (nsends) {
2620     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2621   }
2622 
2623   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2624 
2625   /* create redundant matrix */
2626   /*-------------------------*/
2627   if (reuse == MAT_INITIAL_MATRIX) {
2628     const PetscInt *range;
2629     PetscInt       rstart_sub,rend_sub,mloc_sub;
2630 
2631     /* compute rownz_max for preallocation */
2632     for (imdex=0; imdex<nrecvs; imdex++) {
2633       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2634       rptr = rbuf_j[imdex];
2635       for (i=0; i<j; i++) {
2636         ncols = rptr[i+1] - rptr[i];
2637         if (rownz_max < ncols) rownz_max = ncols;
2638       }
2639     }
2640 
2641     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2642 
2643     /* get local size of redundant matrix
2644        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2645     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2646     rstart_sub = range[nsubcomm*subrank];
2647     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2648       rend_sub = range[nsubcomm*(subrank+1)];
2649     } else {
2650       rend_sub = mat->rmap->N;
2651     }
2652     mloc_sub = rend_sub - rstart_sub;
2653 
2654     if (M == N) {
2655       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2656     } else { /* non-square matrix */
2657       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2658     }
2659     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2660     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2661     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2662     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2663   } else {
2664     C = *matredundant;
2665   }
2666 
2667   /* insert local matrix entries */
2668   rptr = sbuf_j;
2669   cols = sbuf_j + rend-rstart + 1;
2670   vals = sbuf_a;
2671   for (i=0; i<rend-rstart; i++) {
2672     row   = i + rstart;
2673     ncols = rptr[i+1] - rptr[i];
2674     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2675     vals += ncols;
2676     cols += ncols;
2677   }
2678   /* insert received matrix entries */
2679   for (imdex=0; imdex<nrecvs; imdex++) {
2680     rstart = rowrange[recv_rank[imdex]];
2681     rend   = rowrange[recv_rank[imdex]+1];
2682     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2683     rptr   = rbuf_j[imdex];
2684     cols   = rbuf_j[imdex] + rend-rstart + 1;
2685     vals   = rbuf_a[imdex];
2686     for (i=0; i<rend-rstart; i++) {
2687       row   = i + rstart;
2688       ncols = rptr[i+1] - rptr[i];
2689       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2690       vals += ncols;
2691       cols += ncols;
2692     }
2693   }
2694   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2695   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2696 
2697   if (reuse == MAT_INITIAL_MATRIX) {
2698     *matredundant = C;
2699 
2700     /* create a supporting struct and attach it to C for reuse */
2701     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2702     if (subsize == 1) {
2703       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2704       c->redundant = redund;
2705     } else {
2706       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2707       c->redundant = redund;
2708     }
2709 
2710     redund->nzlocal   = nzlocal;
2711     redund->nsends    = nsends;
2712     redund->nrecvs    = nrecvs;
2713     redund->send_rank = send_rank;
2714     redund->recv_rank = recv_rank;
2715     redund->sbuf_nz   = sbuf_nz;
2716     redund->rbuf_nz   = rbuf_nz;
2717     redund->sbuf_j    = sbuf_j;
2718     redund->sbuf_a    = sbuf_a;
2719     redund->rbuf_j    = rbuf_j;
2720     redund->rbuf_a    = rbuf_a;
2721     redund->psubcomm  = NULL;
2722   }
2723   PetscFunctionReturn(0);
2724 }
2725 
2726 #undef __FUNCT__
2727 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2728 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2729 {
2730   PetscErrorCode ierr;
2731   MPI_Comm       comm;
2732   PetscMPIInt    size,subsize;
2733   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2734   Mat_Redundant  *redund=NULL;
2735   PetscSubcomm   psubcomm=NULL;
2736   MPI_Comm       subcomm_in=subcomm;
2737   Mat            *matseq;
2738   IS             isrow,iscol;
2739 
2740   PetscFunctionBegin;
2741   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2742     if (reuse ==  MAT_INITIAL_MATRIX) {
2743       /* create psubcomm, then get subcomm */
2744       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2745       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2746       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2747 
2748       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2749       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2750       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2751       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2752       subcomm = psubcomm->comm;
2753     } else { /* retrieve psubcomm and subcomm */
2754       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2755       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2756       if (subsize == 1) {
2757         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2758         redund = c->redundant;
2759       } else {
2760         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2761         redund = c->redundant;
2762       }
2763       psubcomm = redund->psubcomm;
2764     }
2765     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2766       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2767       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2768         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2769         if (subsize == 1) {
2770           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2771           c->redundant->psubcomm = psubcomm;
2772         } else {
2773           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2774           c->redundant->psubcomm = psubcomm ;
2775         }
2776       }
2777       PetscFunctionReturn(0);
2778     }
2779   }
2780 
2781   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2782   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2783   if (reuse == MAT_INITIAL_MATRIX) {
2784     /* create a local sequential matrix matseq[0] */
2785     mloc_sub = PETSC_DECIDE;
2786     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2787     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2788     rstart = rend - mloc_sub;
2789     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2790     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2791   } else { /* reuse == MAT_REUSE_MATRIX */
2792     if (subsize == 1) {
2793       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2794       redund = c->redundant;
2795     } else {
2796       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2797       redund = c->redundant;
2798     }
2799 
2800     isrow  = redund->isrow;
2801     iscol  = redund->iscol;
2802     matseq = redund->matseq;
2803   }
2804   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2805   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2806 
2807   if (reuse == MAT_INITIAL_MATRIX) {
2808     /* create a supporting struct and attach it to C for reuse */
2809     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2810     if (subsize == 1) {
2811       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2812       c->redundant = redund;
2813     } else {
2814       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2815       c->redundant = redund;
2816     }
2817     redund->isrow    = isrow;
2818     redund->iscol    = iscol;
2819     redund->matseq   = matseq;
2820     redund->psubcomm = psubcomm;
2821   }
2822   PetscFunctionReturn(0);
2823 }
2824 
2825 #undef __FUNCT__
2826 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2827 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2828 {
2829   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2830   PetscErrorCode ierr;
2831   PetscInt       i,*idxb = 0;
2832   PetscScalar    *va,*vb;
2833   Vec            vtmp;
2834 
2835   PetscFunctionBegin;
2836   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2837   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2838   if (idx) {
2839     for (i=0; i<A->rmap->n; i++) {
2840       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2841     }
2842   }
2843 
2844   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2845   if (idx) {
2846     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2847   }
2848   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2849   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2850 
2851   for (i=0; i<A->rmap->n; i++) {
2852     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2853       va[i] = vb[i];
2854       if (idx) idx[i] = a->garray[idxb[i]];
2855     }
2856   }
2857 
2858   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2859   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2860   ierr = PetscFree(idxb);CHKERRQ(ierr);
2861   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 #undef __FUNCT__
2866 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2867 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2868 {
2869   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2870   PetscErrorCode ierr;
2871   PetscInt       i,*idxb = 0;
2872   PetscScalar    *va,*vb;
2873   Vec            vtmp;
2874 
2875   PetscFunctionBegin;
2876   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2877   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2878   if (idx) {
2879     for (i=0; i<A->cmap->n; i++) {
2880       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2881     }
2882   }
2883 
2884   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2885   if (idx) {
2886     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2887   }
2888   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2889   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2890 
2891   for (i=0; i<A->rmap->n; i++) {
2892     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2893       va[i] = vb[i];
2894       if (idx) idx[i] = a->garray[idxb[i]];
2895     }
2896   }
2897 
2898   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2899   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2900   ierr = PetscFree(idxb);CHKERRQ(ierr);
2901   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2902   PetscFunctionReturn(0);
2903 }
2904 
2905 #undef __FUNCT__
2906 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2907 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2908 {
2909   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2910   PetscInt       n      = A->rmap->n;
2911   PetscInt       cstart = A->cmap->rstart;
2912   PetscInt       *cmap  = mat->garray;
2913   PetscInt       *diagIdx, *offdiagIdx;
2914   Vec            diagV, offdiagV;
2915   PetscScalar    *a, *diagA, *offdiagA;
2916   PetscInt       r;
2917   PetscErrorCode ierr;
2918 
2919   PetscFunctionBegin;
2920   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2921   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2922   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2923   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2924   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2925   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2926   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2927   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2928   for (r = 0; r < n; ++r) {
2929     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2930       a[r]   = diagA[r];
2931       idx[r] = cstart + diagIdx[r];
2932     } else {
2933       a[r]   = offdiagA[r];
2934       idx[r] = cmap[offdiagIdx[r]];
2935     }
2936   }
2937   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2938   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2939   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2940   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2941   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2942   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2943   PetscFunctionReturn(0);
2944 }
2945 
2946 #undef __FUNCT__
2947 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2948 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2949 {
2950   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2951   PetscInt       n      = A->rmap->n;
2952   PetscInt       cstart = A->cmap->rstart;
2953   PetscInt       *cmap  = mat->garray;
2954   PetscInt       *diagIdx, *offdiagIdx;
2955   Vec            diagV, offdiagV;
2956   PetscScalar    *a, *diagA, *offdiagA;
2957   PetscInt       r;
2958   PetscErrorCode ierr;
2959 
2960   PetscFunctionBegin;
2961   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2962   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2963   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2964   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2965   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2966   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2967   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2968   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2969   for (r = 0; r < n; ++r) {
2970     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2971       a[r]   = diagA[r];
2972       idx[r] = cstart + diagIdx[r];
2973     } else {
2974       a[r]   = offdiagA[r];
2975       idx[r] = cmap[offdiagIdx[r]];
2976     }
2977   }
2978   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2979   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2980   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2981   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2982   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2983   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2984   PetscFunctionReturn(0);
2985 }
2986 
2987 #undef __FUNCT__
2988 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2989 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2990 {
2991   PetscErrorCode ierr;
2992   Mat            *dummy;
2993 
2994   PetscFunctionBegin;
2995   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2996   *newmat = *dummy;
2997   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2998   PetscFunctionReturn(0);
2999 }
3000 
3001 #undef __FUNCT__
3002 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3003 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3004 {
3005   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3006   PetscErrorCode ierr;
3007 
3008   PetscFunctionBegin;
3009   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3010   PetscFunctionReturn(0);
3011 }
3012 
3013 #undef __FUNCT__
3014 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3015 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3016 {
3017   PetscErrorCode ierr;
3018   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3019 
3020   PetscFunctionBegin;
3021   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3022   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3023   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3024   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3025   PetscFunctionReturn(0);
3026 }
3027 
3028 /* -------------------------------------------------------------------*/
3029 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3030                                        MatGetRow_MPIAIJ,
3031                                        MatRestoreRow_MPIAIJ,
3032                                        MatMult_MPIAIJ,
3033                                 /* 4*/ MatMultAdd_MPIAIJ,
3034                                        MatMultTranspose_MPIAIJ,
3035                                        MatMultTransposeAdd_MPIAIJ,
3036 #if defined(PETSC_HAVE_PBGL)
3037                                        MatSolve_MPIAIJ,
3038 #else
3039                                        0,
3040 #endif
3041                                        0,
3042                                        0,
3043                                 /*10*/ 0,
3044                                        0,
3045                                        0,
3046                                        MatSOR_MPIAIJ,
3047                                        MatTranspose_MPIAIJ,
3048                                 /*15*/ MatGetInfo_MPIAIJ,
3049                                        MatEqual_MPIAIJ,
3050                                        MatGetDiagonal_MPIAIJ,
3051                                        MatDiagonalScale_MPIAIJ,
3052                                        MatNorm_MPIAIJ,
3053                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3054                                        MatAssemblyEnd_MPIAIJ,
3055                                        MatSetOption_MPIAIJ,
3056                                        MatZeroEntries_MPIAIJ,
3057                                 /*24*/ MatZeroRows_MPIAIJ,
3058                                        0,
3059 #if defined(PETSC_HAVE_PBGL)
3060                                        0,
3061 #else
3062                                        0,
3063 #endif
3064                                        0,
3065                                        0,
3066                                 /*29*/ MatSetUp_MPIAIJ,
3067 #if defined(PETSC_HAVE_PBGL)
3068                                        0,
3069 #else
3070                                        0,
3071 #endif
3072                                        0,
3073                                        0,
3074                                        0,
3075                                 /*34*/ MatDuplicate_MPIAIJ,
3076                                        0,
3077                                        0,
3078                                        0,
3079                                        0,
3080                                 /*39*/ MatAXPY_MPIAIJ,
3081                                        MatGetSubMatrices_MPIAIJ,
3082                                        MatIncreaseOverlap_MPIAIJ,
3083                                        MatGetValues_MPIAIJ,
3084                                        MatCopy_MPIAIJ,
3085                                 /*44*/ MatGetRowMax_MPIAIJ,
3086                                        MatScale_MPIAIJ,
3087                                        0,
3088                                        0,
3089                                        MatZeroRowsColumns_MPIAIJ,
3090                                 /*49*/ MatSetRandom_MPIAIJ,
3091                                        0,
3092                                        0,
3093                                        0,
3094                                        0,
3095                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3096                                        0,
3097                                        MatSetUnfactored_MPIAIJ,
3098                                        MatPermute_MPIAIJ,
3099                                        0,
3100                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3101                                        MatDestroy_MPIAIJ,
3102                                        MatView_MPIAIJ,
3103                                        0,
3104                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3105                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3106                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3107                                        0,
3108                                        0,
3109                                        0,
3110                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3111                                        MatGetRowMinAbs_MPIAIJ,
3112                                        0,
3113                                        MatSetColoring_MPIAIJ,
3114                                        0,
3115                                        MatSetValuesAdifor_MPIAIJ,
3116                                 /*75*/ MatFDColoringApply_AIJ,
3117                                        0,
3118                                        0,
3119                                        0,
3120                                        MatFindZeroDiagonals_MPIAIJ,
3121                                 /*80*/ 0,
3122                                        0,
3123                                        0,
3124                                 /*83*/ MatLoad_MPIAIJ,
3125                                        0,
3126                                        0,
3127                                        0,
3128                                        0,
3129                                        0,
3130                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3131                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3132                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3133                                        MatPtAP_MPIAIJ_MPIAIJ,
3134                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3135                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3136                                        0,
3137                                        0,
3138                                        0,
3139                                        0,
3140                                 /*99*/ 0,
3141                                        0,
3142                                        0,
3143                                        MatConjugate_MPIAIJ,
3144                                        0,
3145                                 /*104*/MatSetValuesRow_MPIAIJ,
3146                                        MatRealPart_MPIAIJ,
3147                                        MatImaginaryPart_MPIAIJ,
3148                                        0,
3149                                        0,
3150                                 /*109*/0,
3151                                        MatGetRedundantMatrix_MPIAIJ,
3152                                        MatGetRowMin_MPIAIJ,
3153                                        0,
3154                                        0,
3155                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3156                                        0,
3157                                        0,
3158                                        0,
3159                                        0,
3160                                 /*119*/0,
3161                                        0,
3162                                        0,
3163                                        0,
3164                                        MatGetMultiProcBlock_MPIAIJ,
3165                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3166                                        MatGetColumnNorms_MPIAIJ,
3167                                        MatInvertBlockDiagonal_MPIAIJ,
3168                                        0,
3169                                        MatGetSubMatricesParallel_MPIAIJ,
3170                                 /*129*/0,
3171                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3172                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3173                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3174                                        0,
3175                                 /*134*/0,
3176                                        0,
3177                                        0,
3178                                        0,
3179                                        0,
3180                                 /*139*/0,
3181                                        0,
3182                                        0,
3183                                        MatFDColoringSetUp_MPIXAIJ
3184 };
3185 
3186 /* ----------------------------------------------------------------------------------------*/
3187 
3188 #undef __FUNCT__
3189 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3190 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3191 {
3192   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3193   PetscErrorCode ierr;
3194 
3195   PetscFunctionBegin;
3196   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3197   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3198   PetscFunctionReturn(0);
3199 }
3200 
3201 #undef __FUNCT__
3202 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3203 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3204 {
3205   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3206   PetscErrorCode ierr;
3207 
3208   PetscFunctionBegin;
3209   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3210   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3211   PetscFunctionReturn(0);
3212 }
3213 
3214 #undef __FUNCT__
3215 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3216 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3217 {
3218   Mat_MPIAIJ     *b;
3219   PetscErrorCode ierr;
3220 
3221   PetscFunctionBegin;
3222   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3223   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3224   b = (Mat_MPIAIJ*)B->data;
3225 
3226   if (!B->preallocated) {
3227     /* Explicitly create 2 MATSEQAIJ matrices. */
3228     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3229     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3230     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3231     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3232     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3233     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3234     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3235     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3236     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3237     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3238   }
3239 
3240   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3241   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3242   B->preallocated = PETSC_TRUE;
3243   PetscFunctionReturn(0);
3244 }
3245 
3246 #undef __FUNCT__
3247 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3248 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3249 {
3250   Mat            mat;
3251   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3252   PetscErrorCode ierr;
3253 
3254   PetscFunctionBegin;
3255   *newmat = 0;
3256   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3257   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3258   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3259   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3260   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3261   a       = (Mat_MPIAIJ*)mat->data;
3262 
3263   mat->factortype   = matin->factortype;
3264   mat->assembled    = PETSC_TRUE;
3265   mat->insertmode   = NOT_SET_VALUES;
3266   mat->preallocated = PETSC_TRUE;
3267 
3268   a->size         = oldmat->size;
3269   a->rank         = oldmat->rank;
3270   a->donotstash   = oldmat->donotstash;
3271   a->roworiented  = oldmat->roworiented;
3272   a->rowindices   = 0;
3273   a->rowvalues    = 0;
3274   a->getrowactive = PETSC_FALSE;
3275 
3276   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3277   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3278 
3279   if (oldmat->colmap) {
3280 #if defined(PETSC_USE_CTABLE)
3281     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3282 #else
3283     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3284     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3285     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3286 #endif
3287   } else a->colmap = 0;
3288   if (oldmat->garray) {
3289     PetscInt len;
3290     len  = oldmat->B->cmap->n;
3291     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3292     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3293     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3294   } else a->garray = 0;
3295 
3296   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3297   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3298   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3299   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3300   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3301   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3302   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3303   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3304   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3305   *newmat = mat;
3306   PetscFunctionReturn(0);
3307 }
3308 
3309 
3310 
3311 #undef __FUNCT__
3312 #define __FUNCT__ "MatLoad_MPIAIJ"
3313 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3314 {
3315   PetscScalar    *vals,*svals;
3316   MPI_Comm       comm;
3317   PetscErrorCode ierr;
3318   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3319   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3320   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3321   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3322   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3323   int            fd;
3324   PetscInt       bs = newMat->rmap->bs;
3325 
3326   PetscFunctionBegin;
3327   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3328   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3329   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3330   if (!rank) {
3331     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3332     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3333     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3334   }
3335 
3336   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3337   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3338   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3339   if (bs < 0) bs = 1;
3340 
3341   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3342 
3343   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3344   M    = header[1]; N = header[2];
3345   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3346   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3347   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3348 
3349   /* If global sizes are set, check if they are consistent with that given in the file */
3350   if (sizesset) {
3351     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3352   }
3353   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3354   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3355 
3356   /* determine ownership of all (block) rows */
3357   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3358   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3359   else m = newMat->rmap->n; /* Set by user */
3360 
3361   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3362   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3363 
3364   /* First process needs enough room for process with most rows */
3365   if (!rank) {
3366     mmax = rowners[1];
3367     for (i=2; i<=size; i++) {
3368       mmax = PetscMax(mmax, rowners[i]);
3369     }
3370   } else mmax = -1;             /* unused, but compilers complain */
3371 
3372   rowners[0] = 0;
3373   for (i=2; i<=size; i++) {
3374     rowners[i] += rowners[i-1];
3375   }
3376   rstart = rowners[rank];
3377   rend   = rowners[rank+1];
3378 
3379   /* distribute row lengths to all processors */
3380   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3381   if (!rank) {
3382     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3383     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3384     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3385     for (j=0; j<m; j++) {
3386       procsnz[0] += ourlens[j];
3387     }
3388     for (i=1; i<size; i++) {
3389       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3390       /* calculate the number of nonzeros on each processor */
3391       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3392         procsnz[i] += rowlengths[j];
3393       }
3394       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3395     }
3396     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3397   } else {
3398     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3399   }
3400 
3401   if (!rank) {
3402     /* determine max buffer needed and allocate it */
3403     maxnz = 0;
3404     for (i=0; i<size; i++) {
3405       maxnz = PetscMax(maxnz,procsnz[i]);
3406     }
3407     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3408 
3409     /* read in my part of the matrix column indices  */
3410     nz   = procsnz[0];
3411     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3412     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3413 
3414     /* read in every one elses and ship off */
3415     for (i=1; i<size; i++) {
3416       nz   = procsnz[i];
3417       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3418       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3419     }
3420     ierr = PetscFree(cols);CHKERRQ(ierr);
3421   } else {
3422     /* determine buffer space needed for message */
3423     nz = 0;
3424     for (i=0; i<m; i++) {
3425       nz += ourlens[i];
3426     }
3427     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3428 
3429     /* receive message of column indices*/
3430     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3431   }
3432 
3433   /* determine column ownership if matrix is not square */
3434   if (N != M) {
3435     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3436     else n = newMat->cmap->n;
3437     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3438     cstart = cend - n;
3439   } else {
3440     cstart = rstart;
3441     cend   = rend;
3442     n      = cend - cstart;
3443   }
3444 
3445   /* loop over local rows, determining number of off diagonal entries */
3446   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3447   jj   = 0;
3448   for (i=0; i<m; i++) {
3449     for (j=0; j<ourlens[i]; j++) {
3450       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3451       jj++;
3452     }
3453   }
3454 
3455   for (i=0; i<m; i++) {
3456     ourlens[i] -= offlens[i];
3457   }
3458   if (!sizesset) {
3459     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3460   }
3461 
3462   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3463 
3464   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3465 
3466   for (i=0; i<m; i++) {
3467     ourlens[i] += offlens[i];
3468   }
3469 
3470   if (!rank) {
3471     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3472 
3473     /* read in my part of the matrix numerical values  */
3474     nz   = procsnz[0];
3475     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3476 
3477     /* insert into matrix */
3478     jj      = rstart;
3479     smycols = mycols;
3480     svals   = vals;
3481     for (i=0; i<m; i++) {
3482       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3483       smycols += ourlens[i];
3484       svals   += ourlens[i];
3485       jj++;
3486     }
3487 
3488     /* read in other processors and ship out */
3489     for (i=1; i<size; i++) {
3490       nz   = procsnz[i];
3491       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3492       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3493     }
3494     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3495   } else {
3496     /* receive numeric values */
3497     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3498 
3499     /* receive message of values*/
3500     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3501 
3502     /* insert into matrix */
3503     jj      = rstart;
3504     smycols = mycols;
3505     svals   = vals;
3506     for (i=0; i<m; i++) {
3507       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3508       smycols += ourlens[i];
3509       svals   += ourlens[i];
3510       jj++;
3511     }
3512   }
3513   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3514   ierr = PetscFree(vals);CHKERRQ(ierr);
3515   ierr = PetscFree(mycols);CHKERRQ(ierr);
3516   ierr = PetscFree(rowners);CHKERRQ(ierr);
3517   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3518   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3519   PetscFunctionReturn(0);
3520 }
3521 
3522 #undef __FUNCT__
3523 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3524 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3525 {
3526   PetscErrorCode ierr;
3527   IS             iscol_local;
3528   PetscInt       csize;
3529 
3530   PetscFunctionBegin;
3531   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3532   if (call == MAT_REUSE_MATRIX) {
3533     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3534     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3535   } else {
3536     PetscInt cbs;
3537     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3538     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3539     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3540   }
3541   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3542   if (call == MAT_INITIAL_MATRIX) {
3543     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3544     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3545   }
3546   PetscFunctionReturn(0);
3547 }
3548 
3549 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3550 #undef __FUNCT__
3551 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3552 /*
3553     Not great since it makes two copies of the submatrix, first an SeqAIJ
3554   in local and then by concatenating the local matrices the end result.
3555   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3556 
3557   Note: This requires a sequential iscol with all indices.
3558 */
3559 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3560 {
3561   PetscErrorCode ierr;
3562   PetscMPIInt    rank,size;
3563   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3564   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3565   PetscBool      allcolumns, colflag;
3566   Mat            M,Mreuse;
3567   MatScalar      *vwork,*aa;
3568   MPI_Comm       comm;
3569   Mat_SeqAIJ     *aij;
3570 
3571   PetscFunctionBegin;
3572   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3573   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3574   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3575 
3576   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3577   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3578   if (colflag && ncol == mat->cmap->N) {
3579     allcolumns = PETSC_TRUE;
3580   } else {
3581     allcolumns = PETSC_FALSE;
3582   }
3583   if (call ==  MAT_REUSE_MATRIX) {
3584     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3585     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3586     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3587   } else {
3588     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3589   }
3590 
3591   /*
3592       m - number of local rows
3593       n - number of columns (same on all processors)
3594       rstart - first row in new global matrix generated
3595   */
3596   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3597   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3598   if (call == MAT_INITIAL_MATRIX) {
3599     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3600     ii  = aij->i;
3601     jj  = aij->j;
3602 
3603     /*
3604         Determine the number of non-zeros in the diagonal and off-diagonal
3605         portions of the matrix in order to do correct preallocation
3606     */
3607 
3608     /* first get start and end of "diagonal" columns */
3609     if (csize == PETSC_DECIDE) {
3610       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3611       if (mglobal == n) { /* square matrix */
3612         nlocal = m;
3613       } else {
3614         nlocal = n/size + ((n % size) > rank);
3615       }
3616     } else {
3617       nlocal = csize;
3618     }
3619     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3620     rstart = rend - nlocal;
3621     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3622 
3623     /* next, compute all the lengths */
3624     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3625     olens = dlens + m;
3626     for (i=0; i<m; i++) {
3627       jend = ii[i+1] - ii[i];
3628       olen = 0;
3629       dlen = 0;
3630       for (j=0; j<jend; j++) {
3631         if (*jj < rstart || *jj >= rend) olen++;
3632         else dlen++;
3633         jj++;
3634       }
3635       olens[i] = olen;
3636       dlens[i] = dlen;
3637     }
3638     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3639     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3640     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3641     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3642     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3643     ierr = PetscFree(dlens);CHKERRQ(ierr);
3644   } else {
3645     PetscInt ml,nl;
3646 
3647     M    = *newmat;
3648     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3649     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3650     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3651     /*
3652          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3653        rather than the slower MatSetValues().
3654     */
3655     M->was_assembled = PETSC_TRUE;
3656     M->assembled     = PETSC_FALSE;
3657   }
3658   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3659   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3660   ii   = aij->i;
3661   jj   = aij->j;
3662   aa   = aij->a;
3663   for (i=0; i<m; i++) {
3664     row   = rstart + i;
3665     nz    = ii[i+1] - ii[i];
3666     cwork = jj;     jj += nz;
3667     vwork = aa;     aa += nz;
3668     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3669   }
3670 
3671   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3672   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3673   *newmat = M;
3674 
3675   /* save submatrix used in processor for next request */
3676   if (call ==  MAT_INITIAL_MATRIX) {
3677     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3678     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3679   }
3680   PetscFunctionReturn(0);
3681 }
3682 
3683 #undef __FUNCT__
3684 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3685 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3686 {
3687   PetscInt       m,cstart, cend,j,nnz,i,d;
3688   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3689   const PetscInt *JJ;
3690   PetscScalar    *values;
3691   PetscErrorCode ierr;
3692 
3693   PetscFunctionBegin;
3694   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3695 
3696   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3697   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3698   m      = B->rmap->n;
3699   cstart = B->cmap->rstart;
3700   cend   = B->cmap->rend;
3701   rstart = B->rmap->rstart;
3702 
3703   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3704 
3705 #if defined(PETSC_USE_DEBUGGING)
3706   for (i=0; i<m; i++) {
3707     nnz = Ii[i+1]- Ii[i];
3708     JJ  = J + Ii[i];
3709     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3710     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3711     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3712   }
3713 #endif
3714 
3715   for (i=0; i<m; i++) {
3716     nnz     = Ii[i+1]- Ii[i];
3717     JJ      = J + Ii[i];
3718     nnz_max = PetscMax(nnz_max,nnz);
3719     d       = 0;
3720     for (j=0; j<nnz; j++) {
3721       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3722     }
3723     d_nnz[i] = d;
3724     o_nnz[i] = nnz - d;
3725   }
3726   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3727   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3728 
3729   if (v) values = (PetscScalar*)v;
3730   else {
3731     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3732   }
3733 
3734   for (i=0; i<m; i++) {
3735     ii   = i + rstart;
3736     nnz  = Ii[i+1]- Ii[i];
3737     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3738   }
3739   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3740   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3741 
3742   if (!v) {
3743     ierr = PetscFree(values);CHKERRQ(ierr);
3744   }
3745   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3746   PetscFunctionReturn(0);
3747 }
3748 
3749 #undef __FUNCT__
3750 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3751 /*@
3752    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3753    (the default parallel PETSc format).
3754 
3755    Collective on MPI_Comm
3756 
3757    Input Parameters:
3758 +  B - the matrix
3759 .  i - the indices into j for the start of each local row (starts with zero)
3760 .  j - the column indices for each local row (starts with zero)
3761 -  v - optional values in the matrix
3762 
3763    Level: developer
3764 
3765    Notes:
3766        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3767      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3768      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3769 
3770        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3771 
3772        The format which is used for the sparse matrix input, is equivalent to a
3773     row-major ordering.. i.e for the following matrix, the input data expected is
3774     as shown:
3775 
3776         1 0 0
3777         2 0 3     P0
3778        -------
3779         4 5 6     P1
3780 
3781      Process0 [P0]: rows_owned=[0,1]
3782         i =  {0,1,3}  [size = nrow+1  = 2+1]
3783         j =  {0,0,2}  [size = nz = 6]
3784         v =  {1,2,3}  [size = nz = 6]
3785 
3786      Process1 [P1]: rows_owned=[2]
3787         i =  {0,3}    [size = nrow+1  = 1+1]
3788         j =  {0,1,2}  [size = nz = 6]
3789         v =  {4,5,6}  [size = nz = 6]
3790 
3791 .keywords: matrix, aij, compressed row, sparse, parallel
3792 
3793 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3794           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3795 @*/
3796 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3797 {
3798   PetscErrorCode ierr;
3799 
3800   PetscFunctionBegin;
3801   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3802   PetscFunctionReturn(0);
3803 }
3804 
3805 #undef __FUNCT__
3806 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3807 /*@C
3808    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3809    (the default parallel PETSc format).  For good matrix assembly performance
3810    the user should preallocate the matrix storage by setting the parameters
3811    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3812    performance can be increased by more than a factor of 50.
3813 
3814    Collective on MPI_Comm
3815 
3816    Input Parameters:
3817 +  B - the matrix
3818 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3819            (same value is used for all local rows)
3820 .  d_nnz - array containing the number of nonzeros in the various rows of the
3821            DIAGONAL portion of the local submatrix (possibly different for each row)
3822            or NULL, if d_nz is used to specify the nonzero structure.
3823            The size of this array is equal to the number of local rows, i.e 'm'.
3824            For matrices that will be factored, you must leave room for (and set)
3825            the diagonal entry even if it is zero.
3826 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3827            submatrix (same value is used for all local rows).
3828 -  o_nnz - array containing the number of nonzeros in the various rows of the
3829            OFF-DIAGONAL portion of the local submatrix (possibly different for
3830            each row) or NULL, if o_nz is used to specify the nonzero
3831            structure. The size of this array is equal to the number
3832            of local rows, i.e 'm'.
3833 
3834    If the *_nnz parameter is given then the *_nz parameter is ignored
3835 
3836    The AIJ format (also called the Yale sparse matrix format or
3837    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3838    storage.  The stored row and column indices begin with zero.
3839    See Users-Manual: ch_mat for details.
3840 
3841    The parallel matrix is partitioned such that the first m0 rows belong to
3842    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3843    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3844 
3845    The DIAGONAL portion of the local submatrix of a processor can be defined
3846    as the submatrix which is obtained by extraction the part corresponding to
3847    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3848    first row that belongs to the processor, r2 is the last row belonging to
3849    the this processor, and c1-c2 is range of indices of the local part of a
3850    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3851    common case of a square matrix, the row and column ranges are the same and
3852    the DIAGONAL part is also square. The remaining portion of the local
3853    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3854 
3855    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3856 
3857    You can call MatGetInfo() to get information on how effective the preallocation was;
3858    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3859    You can also run with the option -info and look for messages with the string
3860    malloc in them to see if additional memory allocation was needed.
3861 
3862    Example usage:
3863 
3864    Consider the following 8x8 matrix with 34 non-zero values, that is
3865    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3866    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3867    as follows:
3868 
3869 .vb
3870             1  2  0  |  0  3  0  |  0  4
3871     Proc0   0  5  6  |  7  0  0  |  8  0
3872             9  0 10  | 11  0  0  | 12  0
3873     -------------------------------------
3874            13  0 14  | 15 16 17  |  0  0
3875     Proc1   0 18  0  | 19 20 21  |  0  0
3876             0  0  0  | 22 23  0  | 24  0
3877     -------------------------------------
3878     Proc2  25 26 27  |  0  0 28  | 29  0
3879            30  0  0  | 31 32 33  |  0 34
3880 .ve
3881 
3882    This can be represented as a collection of submatrices as:
3883 
3884 .vb
3885       A B C
3886       D E F
3887       G H I
3888 .ve
3889 
3890    Where the submatrices A,B,C are owned by proc0, D,E,F are
3891    owned by proc1, G,H,I are owned by proc2.
3892 
3893    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3894    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3895    The 'M','N' parameters are 8,8, and have the same values on all procs.
3896 
3897    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3898    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3899    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3900    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3901    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3902    matrix, ans [DF] as another SeqAIJ matrix.
3903 
3904    When d_nz, o_nz parameters are specified, d_nz storage elements are
3905    allocated for every row of the local diagonal submatrix, and o_nz
3906    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3907    One way to choose d_nz and o_nz is to use the max nonzerors per local
3908    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3909    In this case, the values of d_nz,o_nz are:
3910 .vb
3911      proc0 : dnz = 2, o_nz = 2
3912      proc1 : dnz = 3, o_nz = 2
3913      proc2 : dnz = 1, o_nz = 4
3914 .ve
3915    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3916    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3917    for proc3. i.e we are using 12+15+10=37 storage locations to store
3918    34 values.
3919 
3920    When d_nnz, o_nnz parameters are specified, the storage is specified
3921    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3922    In the above case the values for d_nnz,o_nnz are:
3923 .vb
3924      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3925      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3926      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3927 .ve
3928    Here the space allocated is sum of all the above values i.e 34, and
3929    hence pre-allocation is perfect.
3930 
3931    Level: intermediate
3932 
3933 .keywords: matrix, aij, compressed row, sparse, parallel
3934 
3935 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3936           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3937 @*/
3938 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3939 {
3940   PetscErrorCode ierr;
3941 
3942   PetscFunctionBegin;
3943   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3944   PetscValidType(B,1);
3945   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3946   PetscFunctionReturn(0);
3947 }
3948 
3949 #undef __FUNCT__
3950 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3951 /*@
3952      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3953          CSR format the local rows.
3954 
3955    Collective on MPI_Comm
3956 
3957    Input Parameters:
3958 +  comm - MPI communicator
3959 .  m - number of local rows (Cannot be PETSC_DECIDE)
3960 .  n - This value should be the same as the local size used in creating the
3961        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3962        calculated if N is given) For square matrices n is almost always m.
3963 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3964 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3965 .   i - row indices
3966 .   j - column indices
3967 -   a - matrix values
3968 
3969    Output Parameter:
3970 .   mat - the matrix
3971 
3972    Level: intermediate
3973 
3974    Notes:
3975        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3976      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3977      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3978 
3979        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3980 
3981        The format which is used for the sparse matrix input, is equivalent to a
3982     row-major ordering.. i.e for the following matrix, the input data expected is
3983     as shown:
3984 
3985         1 0 0
3986         2 0 3     P0
3987        -------
3988         4 5 6     P1
3989 
3990      Process0 [P0]: rows_owned=[0,1]
3991         i =  {0,1,3}  [size = nrow+1  = 2+1]
3992         j =  {0,0,2}  [size = nz = 6]
3993         v =  {1,2,3}  [size = nz = 6]
3994 
3995      Process1 [P1]: rows_owned=[2]
3996         i =  {0,3}    [size = nrow+1  = 1+1]
3997         j =  {0,1,2}  [size = nz = 6]
3998         v =  {4,5,6}  [size = nz = 6]
3999 
4000 .keywords: matrix, aij, compressed row, sparse, parallel
4001 
4002 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4003           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4004 @*/
4005 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4006 {
4007   PetscErrorCode ierr;
4008 
4009   PetscFunctionBegin;
4010   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4011   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4012   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4013   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4014   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4015   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4016   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4017   PetscFunctionReturn(0);
4018 }
4019 
4020 #undef __FUNCT__
4021 #define __FUNCT__ "MatCreateAIJ"
4022 /*@C
4023    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4024    (the default parallel PETSc format).  For good matrix assembly performance
4025    the user should preallocate the matrix storage by setting the parameters
4026    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4027    performance can be increased by more than a factor of 50.
4028 
4029    Collective on MPI_Comm
4030 
4031    Input Parameters:
4032 +  comm - MPI communicator
4033 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4034            This value should be the same as the local size used in creating the
4035            y vector for the matrix-vector product y = Ax.
4036 .  n - This value should be the same as the local size used in creating the
4037        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4038        calculated if N is given) For square matrices n is almost always m.
4039 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4040 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4041 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4042            (same value is used for all local rows)
4043 .  d_nnz - array containing the number of nonzeros in the various rows of the
4044            DIAGONAL portion of the local submatrix (possibly different for each row)
4045            or NULL, if d_nz is used to specify the nonzero structure.
4046            The size of this array is equal to the number of local rows, i.e 'm'.
4047 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4048            submatrix (same value is used for all local rows).
4049 -  o_nnz - array containing the number of nonzeros in the various rows of the
4050            OFF-DIAGONAL portion of the local submatrix (possibly different for
4051            each row) or NULL, if o_nz is used to specify the nonzero
4052            structure. The size of this array is equal to the number
4053            of local rows, i.e 'm'.
4054 
4055    Output Parameter:
4056 .  A - the matrix
4057 
4058    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4059    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4060    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4061 
4062    Notes:
4063    If the *_nnz parameter is given then the *_nz parameter is ignored
4064 
4065    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4066    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4067    storage requirements for this matrix.
4068 
4069    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4070    processor than it must be used on all processors that share the object for
4071    that argument.
4072 
4073    The user MUST specify either the local or global matrix dimensions
4074    (possibly both).
4075 
4076    The parallel matrix is partitioned across processors such that the
4077    first m0 rows belong to process 0, the next m1 rows belong to
4078    process 1, the next m2 rows belong to process 2 etc.. where
4079    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4080    values corresponding to [m x N] submatrix.
4081 
4082    The columns are logically partitioned with the n0 columns belonging
4083    to 0th partition, the next n1 columns belonging to the next
4084    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4085 
4086    The DIAGONAL portion of the local submatrix on any given processor
4087    is the submatrix corresponding to the rows and columns m,n
4088    corresponding to the given processor. i.e diagonal matrix on
4089    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4090    etc. The remaining portion of the local submatrix [m x (N-n)]
4091    constitute the OFF-DIAGONAL portion. The example below better
4092    illustrates this concept.
4093 
4094    For a square global matrix we define each processor's diagonal portion
4095    to be its local rows and the corresponding columns (a square submatrix);
4096    each processor's off-diagonal portion encompasses the remainder of the
4097    local matrix (a rectangular submatrix).
4098 
4099    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4100 
4101    When calling this routine with a single process communicator, a matrix of
4102    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4103    type of communicator, use the construction mechanism:
4104      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4105 
4106    By default, this format uses inodes (identical nodes) when possible.
4107    We search for consecutive rows with the same nonzero structure, thereby
4108    reusing matrix information to achieve increased efficiency.
4109 
4110    Options Database Keys:
4111 +  -mat_no_inode  - Do not use inodes
4112 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4113 -  -mat_aij_oneindex - Internally use indexing starting at 1
4114         rather than 0.  Note that when calling MatSetValues(),
4115         the user still MUST index entries starting at 0!
4116 
4117 
4118    Example usage:
4119 
4120    Consider the following 8x8 matrix with 34 non-zero values, that is
4121    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4122    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4123    as follows:
4124 
4125 .vb
4126             1  2  0  |  0  3  0  |  0  4
4127     Proc0   0  5  6  |  7  0  0  |  8  0
4128             9  0 10  | 11  0  0  | 12  0
4129     -------------------------------------
4130            13  0 14  | 15 16 17  |  0  0
4131     Proc1   0 18  0  | 19 20 21  |  0  0
4132             0  0  0  | 22 23  0  | 24  0
4133     -------------------------------------
4134     Proc2  25 26 27  |  0  0 28  | 29  0
4135            30  0  0  | 31 32 33  |  0 34
4136 .ve
4137 
4138    This can be represented as a collection of submatrices as:
4139 
4140 .vb
4141       A B C
4142       D E F
4143       G H I
4144 .ve
4145 
4146    Where the submatrices A,B,C are owned by proc0, D,E,F are
4147    owned by proc1, G,H,I are owned by proc2.
4148 
4149    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4150    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4151    The 'M','N' parameters are 8,8, and have the same values on all procs.
4152 
4153    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4154    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4155    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4156    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4157    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4158    matrix, ans [DF] as another SeqAIJ matrix.
4159 
4160    When d_nz, o_nz parameters are specified, d_nz storage elements are
4161    allocated for every row of the local diagonal submatrix, and o_nz
4162    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4163    One way to choose d_nz and o_nz is to use the max nonzerors per local
4164    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4165    In this case, the values of d_nz,o_nz are:
4166 .vb
4167      proc0 : dnz = 2, o_nz = 2
4168      proc1 : dnz = 3, o_nz = 2
4169      proc2 : dnz = 1, o_nz = 4
4170 .ve
4171    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4172    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4173    for proc3. i.e we are using 12+15+10=37 storage locations to store
4174    34 values.
4175 
4176    When d_nnz, o_nnz parameters are specified, the storage is specified
4177    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4178    In the above case the values for d_nnz,o_nnz are:
4179 .vb
4180      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4181      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4182      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4183 .ve
4184    Here the space allocated is sum of all the above values i.e 34, and
4185    hence pre-allocation is perfect.
4186 
4187    Level: intermediate
4188 
4189 .keywords: matrix, aij, compressed row, sparse, parallel
4190 
4191 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4192           MPIAIJ, MatCreateMPIAIJWithArrays()
4193 @*/
4194 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4195 {
4196   PetscErrorCode ierr;
4197   PetscMPIInt    size;
4198 
4199   PetscFunctionBegin;
4200   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4201   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4202   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4203   if (size > 1) {
4204     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4205     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4206   } else {
4207     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4208     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4209   }
4210   PetscFunctionReturn(0);
4211 }
4212 
4213 #undef __FUNCT__
4214 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4215 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4216 {
4217   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4218 
4219   PetscFunctionBegin;
4220   if (Ad)     *Ad     = a->A;
4221   if (Ao)     *Ao     = a->B;
4222   if (colmap) *colmap = a->garray;
4223   PetscFunctionReturn(0);
4224 }
4225 
4226 #undef __FUNCT__
4227 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4228 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4229 {
4230   PetscErrorCode ierr;
4231   PetscInt       i;
4232   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4233 
4234   PetscFunctionBegin;
4235   if (coloring->ctype == IS_COLORING_GLOBAL) {
4236     ISColoringValue *allcolors,*colors;
4237     ISColoring      ocoloring;
4238 
4239     /* set coloring for diagonal portion */
4240     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4241 
4242     /* set coloring for off-diagonal portion */
4243     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4244     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4245     for (i=0; i<a->B->cmap->n; i++) {
4246       colors[i] = allcolors[a->garray[i]];
4247     }
4248     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4249     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4250     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4251     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4252   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4253     ISColoringValue *colors;
4254     PetscInt        *larray;
4255     ISColoring      ocoloring;
4256 
4257     /* set coloring for diagonal portion */
4258     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4259     for (i=0; i<a->A->cmap->n; i++) {
4260       larray[i] = i + A->cmap->rstart;
4261     }
4262     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4263     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4264     for (i=0; i<a->A->cmap->n; i++) {
4265       colors[i] = coloring->colors[larray[i]];
4266     }
4267     ierr = PetscFree(larray);CHKERRQ(ierr);
4268     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4269     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4270     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4271 
4272     /* set coloring for off-diagonal portion */
4273     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4274     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4275     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4276     for (i=0; i<a->B->cmap->n; i++) {
4277       colors[i] = coloring->colors[larray[i]];
4278     }
4279     ierr = PetscFree(larray);CHKERRQ(ierr);
4280     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4281     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4282     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4283   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4284   PetscFunctionReturn(0);
4285 }
4286 
4287 #undef __FUNCT__
4288 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4289 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4290 {
4291   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4292   PetscErrorCode ierr;
4293 
4294   PetscFunctionBegin;
4295   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4296   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4297   PetscFunctionReturn(0);
4298 }
4299 
4300 #undef __FUNCT__
4301 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4302 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4303 {
4304   PetscErrorCode ierr;
4305   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4306   PetscInt       *indx;
4307 
4308   PetscFunctionBegin;
4309   /* This routine will ONLY return MPIAIJ type matrix */
4310   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4311   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4312   if (n == PETSC_DECIDE) {
4313     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4314   }
4315   /* Check sum(n) = N */
4316   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4317   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4318 
4319   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4320   rstart -= m;
4321 
4322   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4323   for (i=0; i<m; i++) {
4324     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4325     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4326     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4327   }
4328 
4329   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4330   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4331   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4332   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4333   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4334   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4335   PetscFunctionReturn(0);
4336 }
4337 
4338 #undef __FUNCT__
4339 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4340 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4341 {
4342   PetscErrorCode ierr;
4343   PetscInt       m,N,i,rstart,nnz,Ii;
4344   PetscInt       *indx;
4345   PetscScalar    *values;
4346 
4347   PetscFunctionBegin;
4348   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4349   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4350   for (i=0; i<m; i++) {
4351     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4352     Ii   = i + rstart;
4353     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4354     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4355   }
4356   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4357   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4358   PetscFunctionReturn(0);
4359 }
4360 
4361 #undef __FUNCT__
4362 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4363 /*@
4364       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4365                  matrices from each processor
4366 
4367     Collective on MPI_Comm
4368 
4369    Input Parameters:
4370 +    comm - the communicators the parallel matrix will live on
4371 .    inmat - the input sequential matrices
4372 .    n - number of local columns (or PETSC_DECIDE)
4373 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4374 
4375    Output Parameter:
4376 .    outmat - the parallel matrix generated
4377 
4378     Level: advanced
4379 
4380    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4381 
4382 @*/
4383 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4384 {
4385   PetscErrorCode ierr;
4386   PetscMPIInt    size;
4387 
4388   PetscFunctionBegin;
4389   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4390   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4391   if (size == 1) {
4392     if (scall == MAT_INITIAL_MATRIX) {
4393       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4394     } else {
4395       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4396     }
4397   } else {
4398     if (scall == MAT_INITIAL_MATRIX) {
4399       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4400     }
4401     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4402   }
4403   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4404   PetscFunctionReturn(0);
4405 }
4406 
4407 #undef __FUNCT__
4408 #define __FUNCT__ "MatFileSplit"
4409 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4410 {
4411   PetscErrorCode    ierr;
4412   PetscMPIInt       rank;
4413   PetscInt          m,N,i,rstart,nnz;
4414   size_t            len;
4415   const PetscInt    *indx;
4416   PetscViewer       out;
4417   char              *name;
4418   Mat               B;
4419   const PetscScalar *values;
4420 
4421   PetscFunctionBegin;
4422   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4423   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4424   /* Should this be the type of the diagonal block of A? */
4425   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4426   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4427   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4428   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4429   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4430   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4431   for (i=0; i<m; i++) {
4432     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4433     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4434     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4435   }
4436   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4437   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4438 
4439   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4440   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4441   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4442   sprintf(name,"%s.%d",outfile,rank);
4443   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4444   ierr = PetscFree(name);CHKERRQ(ierr);
4445   ierr = MatView(B,out);CHKERRQ(ierr);
4446   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4447   ierr = MatDestroy(&B);CHKERRQ(ierr);
4448   PetscFunctionReturn(0);
4449 }
4450 
4451 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4452 #undef __FUNCT__
4453 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4454 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4455 {
4456   PetscErrorCode      ierr;
4457   Mat_Merge_SeqsToMPI *merge;
4458   PetscContainer      container;
4459 
4460   PetscFunctionBegin;
4461   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4462   if (container) {
4463     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4464     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4465     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4466     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4467     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4468     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4469     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4470     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4471     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4472     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4473     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4474     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4475     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4476     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4477     ierr = PetscFree(merge);CHKERRQ(ierr);
4478     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4479   }
4480   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4481   PetscFunctionReturn(0);
4482 }
4483 
4484 #include <../src/mat/utils/freespace.h>
4485 #include <petscbt.h>
4486 
4487 #undef __FUNCT__
4488 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4489 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4490 {
4491   PetscErrorCode      ierr;
4492   MPI_Comm            comm;
4493   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4494   PetscMPIInt         size,rank,taga,*len_s;
4495   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4496   PetscInt            proc,m;
4497   PetscInt            **buf_ri,**buf_rj;
4498   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4499   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4500   MPI_Request         *s_waits,*r_waits;
4501   MPI_Status          *status;
4502   MatScalar           *aa=a->a;
4503   MatScalar           **abuf_r,*ba_i;
4504   Mat_Merge_SeqsToMPI *merge;
4505   PetscContainer      container;
4506 
4507   PetscFunctionBegin;
4508   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4509   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4510 
4511   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4512   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4513 
4514   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4515   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4516 
4517   bi     = merge->bi;
4518   bj     = merge->bj;
4519   buf_ri = merge->buf_ri;
4520   buf_rj = merge->buf_rj;
4521 
4522   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4523   owners = merge->rowmap->range;
4524   len_s  = merge->len_s;
4525 
4526   /* send and recv matrix values */
4527   /*-----------------------------*/
4528   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4529   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4530 
4531   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4532   for (proc=0,k=0; proc<size; proc++) {
4533     if (!len_s[proc]) continue;
4534     i    = owners[proc];
4535     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4536     k++;
4537   }
4538 
4539   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4540   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4541   ierr = PetscFree(status);CHKERRQ(ierr);
4542 
4543   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4544   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4545 
4546   /* insert mat values of mpimat */
4547   /*----------------------------*/
4548   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4549   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4550 
4551   for (k=0; k<merge->nrecv; k++) {
4552     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4553     nrows       = *(buf_ri_k[k]);
4554     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4555     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4556   }
4557 
4558   /* set values of ba */
4559   m = merge->rowmap->n;
4560   for (i=0; i<m; i++) {
4561     arow = owners[rank] + i;
4562     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4563     bnzi = bi[i+1] - bi[i];
4564     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4565 
4566     /* add local non-zero vals of this proc's seqmat into ba */
4567     anzi   = ai[arow+1] - ai[arow];
4568     aj     = a->j + ai[arow];
4569     aa     = a->a + ai[arow];
4570     nextaj = 0;
4571     for (j=0; nextaj<anzi; j++) {
4572       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4573         ba_i[j] += aa[nextaj++];
4574       }
4575     }
4576 
4577     /* add received vals into ba */
4578     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4579       /* i-th row */
4580       if (i == *nextrow[k]) {
4581         anzi   = *(nextai[k]+1) - *nextai[k];
4582         aj     = buf_rj[k] + *(nextai[k]);
4583         aa     = abuf_r[k] + *(nextai[k]);
4584         nextaj = 0;
4585         for (j=0; nextaj<anzi; j++) {
4586           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4587             ba_i[j] += aa[nextaj++];
4588           }
4589         }
4590         nextrow[k]++; nextai[k]++;
4591       }
4592     }
4593     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4594   }
4595   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4596   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4597 
4598   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4599   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4600   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4601   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4602   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4603   PetscFunctionReturn(0);
4604 }
4605 
4606 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4607 
4608 #undef __FUNCT__
4609 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4610 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4611 {
4612   PetscErrorCode      ierr;
4613   Mat                 B_mpi;
4614   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4615   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4616   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4617   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4618   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4619   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4620   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4621   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4622   MPI_Status          *status;
4623   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4624   PetscBT             lnkbt;
4625   Mat_Merge_SeqsToMPI *merge;
4626   PetscContainer      container;
4627 
4628   PetscFunctionBegin;
4629   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4630 
4631   /* make sure it is a PETSc comm */
4632   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4633   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4634   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4635 
4636   ierr = PetscNew(&merge);CHKERRQ(ierr);
4637   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4638 
4639   /* determine row ownership */
4640   /*---------------------------------------------------------*/
4641   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4642   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4643   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4644   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4645   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4646   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4647   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4648 
4649   m      = merge->rowmap->n;
4650   owners = merge->rowmap->range;
4651 
4652   /* determine the number of messages to send, their lengths */
4653   /*---------------------------------------------------------*/
4654   len_s = merge->len_s;
4655 
4656   len          = 0; /* length of buf_si[] */
4657   merge->nsend = 0;
4658   for (proc=0; proc<size; proc++) {
4659     len_si[proc] = 0;
4660     if (proc == rank) {
4661       len_s[proc] = 0;
4662     } else {
4663       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4664       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4665     }
4666     if (len_s[proc]) {
4667       merge->nsend++;
4668       nrows = 0;
4669       for (i=owners[proc]; i<owners[proc+1]; i++) {
4670         if (ai[i+1] > ai[i]) nrows++;
4671       }
4672       len_si[proc] = 2*(nrows+1);
4673       len         += len_si[proc];
4674     }
4675   }
4676 
4677   /* determine the number and length of messages to receive for ij-structure */
4678   /*-------------------------------------------------------------------------*/
4679   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4680   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4681 
4682   /* post the Irecv of j-structure */
4683   /*-------------------------------*/
4684   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4685   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4686 
4687   /* post the Isend of j-structure */
4688   /*--------------------------------*/
4689   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4690 
4691   for (proc=0, k=0; proc<size; proc++) {
4692     if (!len_s[proc]) continue;
4693     i    = owners[proc];
4694     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4695     k++;
4696   }
4697 
4698   /* receives and sends of j-structure are complete */
4699   /*------------------------------------------------*/
4700   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4701   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4702 
4703   /* send and recv i-structure */
4704   /*---------------------------*/
4705   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4706   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4707 
4708   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4709   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4710   for (proc=0,k=0; proc<size; proc++) {
4711     if (!len_s[proc]) continue;
4712     /* form outgoing message for i-structure:
4713          buf_si[0]:                 nrows to be sent
4714                [1:nrows]:           row index (global)
4715                [nrows+1:2*nrows+1]: i-structure index
4716     */
4717     /*-------------------------------------------*/
4718     nrows       = len_si[proc]/2 - 1;
4719     buf_si_i    = buf_si + nrows+1;
4720     buf_si[0]   = nrows;
4721     buf_si_i[0] = 0;
4722     nrows       = 0;
4723     for (i=owners[proc]; i<owners[proc+1]; i++) {
4724       anzi = ai[i+1] - ai[i];
4725       if (anzi) {
4726         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4727         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4728         nrows++;
4729       }
4730     }
4731     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4732     k++;
4733     buf_si += len_si[proc];
4734   }
4735 
4736   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4737   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4738 
4739   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4740   for (i=0; i<merge->nrecv; i++) {
4741     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4742   }
4743 
4744   ierr = PetscFree(len_si);CHKERRQ(ierr);
4745   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4746   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4747   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4748   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4749   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4750   ierr = PetscFree(status);CHKERRQ(ierr);
4751 
4752   /* compute a local seq matrix in each processor */
4753   /*----------------------------------------------*/
4754   /* allocate bi array and free space for accumulating nonzero column info */
4755   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4756   bi[0] = 0;
4757 
4758   /* create and initialize a linked list */
4759   nlnk = N+1;
4760   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4761 
4762   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4763   len  = ai[owners[rank+1]] - ai[owners[rank]];
4764   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4765 
4766   current_space = free_space;
4767 
4768   /* determine symbolic info for each local row */
4769   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4770 
4771   for (k=0; k<merge->nrecv; k++) {
4772     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4773     nrows       = *buf_ri_k[k];
4774     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4775     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4776   }
4777 
4778   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4779   len  = 0;
4780   for (i=0; i<m; i++) {
4781     bnzi = 0;
4782     /* add local non-zero cols of this proc's seqmat into lnk */
4783     arow  = owners[rank] + i;
4784     anzi  = ai[arow+1] - ai[arow];
4785     aj    = a->j + ai[arow];
4786     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4787     bnzi += nlnk;
4788     /* add received col data into lnk */
4789     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4790       if (i == *nextrow[k]) { /* i-th row */
4791         anzi  = *(nextai[k]+1) - *nextai[k];
4792         aj    = buf_rj[k] + *nextai[k];
4793         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4794         bnzi += nlnk;
4795         nextrow[k]++; nextai[k]++;
4796       }
4797     }
4798     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4799 
4800     /* if free space is not available, make more free space */
4801     if (current_space->local_remaining<bnzi) {
4802       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4803       nspacedouble++;
4804     }
4805     /* copy data into free space, then initialize lnk */
4806     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4807     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4808 
4809     current_space->array           += bnzi;
4810     current_space->local_used      += bnzi;
4811     current_space->local_remaining -= bnzi;
4812 
4813     bi[i+1] = bi[i] + bnzi;
4814   }
4815 
4816   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4817 
4818   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4819   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4820   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4821 
4822   /* create symbolic parallel matrix B_mpi */
4823   /*---------------------------------------*/
4824   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4825   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4826   if (n==PETSC_DECIDE) {
4827     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4828   } else {
4829     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4830   }
4831   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4832   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4833   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4834   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4835   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4836 
4837   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4838   B_mpi->assembled    = PETSC_FALSE;
4839   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4840   merge->bi           = bi;
4841   merge->bj           = bj;
4842   merge->buf_ri       = buf_ri;
4843   merge->buf_rj       = buf_rj;
4844   merge->coi          = NULL;
4845   merge->coj          = NULL;
4846   merge->owners_co    = NULL;
4847 
4848   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4849 
4850   /* attach the supporting struct to B_mpi for reuse */
4851   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4852   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4853   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4854   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4855   *mpimat = B_mpi;
4856 
4857   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4858   PetscFunctionReturn(0);
4859 }
4860 
4861 #undef __FUNCT__
4862 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4863 /*@C
4864       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4865                  matrices from each processor
4866 
4867     Collective on MPI_Comm
4868 
4869    Input Parameters:
4870 +    comm - the communicators the parallel matrix will live on
4871 .    seqmat - the input sequential matrices
4872 .    m - number of local rows (or PETSC_DECIDE)
4873 .    n - number of local columns (or PETSC_DECIDE)
4874 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4875 
4876    Output Parameter:
4877 .    mpimat - the parallel matrix generated
4878 
4879     Level: advanced
4880 
4881    Notes:
4882      The dimensions of the sequential matrix in each processor MUST be the same.
4883      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4884      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4885 @*/
4886 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4887 {
4888   PetscErrorCode ierr;
4889   PetscMPIInt    size;
4890 
4891   PetscFunctionBegin;
4892   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4893   if (size == 1) {
4894     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4895     if (scall == MAT_INITIAL_MATRIX) {
4896       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4897     } else {
4898       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4899     }
4900     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4901     PetscFunctionReturn(0);
4902   }
4903   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4904   if (scall == MAT_INITIAL_MATRIX) {
4905     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4906   }
4907   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4908   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4909   PetscFunctionReturn(0);
4910 }
4911 
4912 #undef __FUNCT__
4913 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4914 /*@
4915      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4916           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4917           with MatGetSize()
4918 
4919     Not Collective
4920 
4921    Input Parameters:
4922 +    A - the matrix
4923 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4924 
4925    Output Parameter:
4926 .    A_loc - the local sequential matrix generated
4927 
4928     Level: developer
4929 
4930 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4931 
4932 @*/
4933 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4934 {
4935   PetscErrorCode ierr;
4936   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4937   Mat_SeqAIJ     *mat,*a,*b;
4938   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4939   MatScalar      *aa,*ba,*cam;
4940   PetscScalar    *ca;
4941   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4942   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4943   PetscBool      match;
4944   MPI_Comm       comm;
4945   PetscMPIInt    size;
4946 
4947   PetscFunctionBegin;
4948   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4949   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4950   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4951   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4952   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4953 
4954   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4955   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4956   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4957   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4958   aa = a->a; ba = b->a;
4959   if (scall == MAT_INITIAL_MATRIX) {
4960     if (size == 1) {
4961       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4962       PetscFunctionReturn(0);
4963     }
4964 
4965     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4966     ci[0] = 0;
4967     for (i=0; i<am; i++) {
4968       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4969     }
4970     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4971     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4972     k    = 0;
4973     for (i=0; i<am; i++) {
4974       ncols_o = bi[i+1] - bi[i];
4975       ncols_d = ai[i+1] - ai[i];
4976       /* off-diagonal portion of A */
4977       for (jo=0; jo<ncols_o; jo++) {
4978         col = cmap[*bj];
4979         if (col >= cstart) break;
4980         cj[k]   = col; bj++;
4981         ca[k++] = *ba++;
4982       }
4983       /* diagonal portion of A */
4984       for (j=0; j<ncols_d; j++) {
4985         cj[k]   = cstart + *aj++;
4986         ca[k++] = *aa++;
4987       }
4988       /* off-diagonal portion of A */
4989       for (j=jo; j<ncols_o; j++) {
4990         cj[k]   = cmap[*bj++];
4991         ca[k++] = *ba++;
4992       }
4993     }
4994     /* put together the new matrix */
4995     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4996     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4997     /* Since these are PETSc arrays, change flags to free them as necessary. */
4998     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4999     mat->free_a  = PETSC_TRUE;
5000     mat->free_ij = PETSC_TRUE;
5001     mat->nonew   = 0;
5002   } else if (scall == MAT_REUSE_MATRIX) {
5003     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5004     ci = mat->i; cj = mat->j; cam = mat->a;
5005     for (i=0; i<am; i++) {
5006       /* off-diagonal portion of A */
5007       ncols_o = bi[i+1] - bi[i];
5008       for (jo=0; jo<ncols_o; jo++) {
5009         col = cmap[*bj];
5010         if (col >= cstart) break;
5011         *cam++ = *ba++; bj++;
5012       }
5013       /* diagonal portion of A */
5014       ncols_d = ai[i+1] - ai[i];
5015       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5016       /* off-diagonal portion of A */
5017       for (j=jo; j<ncols_o; j++) {
5018         *cam++ = *ba++; bj++;
5019       }
5020     }
5021   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5022   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5023   PetscFunctionReturn(0);
5024 }
5025 
5026 #undef __FUNCT__
5027 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5028 /*@C
5029      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5030 
5031     Not Collective
5032 
5033    Input Parameters:
5034 +    A - the matrix
5035 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5036 -    row, col - index sets of rows and columns to extract (or NULL)
5037 
5038    Output Parameter:
5039 .    A_loc - the local sequential matrix generated
5040 
5041     Level: developer
5042 
5043 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5044 
5045 @*/
5046 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5047 {
5048   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5049   PetscErrorCode ierr;
5050   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5051   IS             isrowa,iscola;
5052   Mat            *aloc;
5053   PetscBool      match;
5054 
5055   PetscFunctionBegin;
5056   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5057   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5058   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5059   if (!row) {
5060     start = A->rmap->rstart; end = A->rmap->rend;
5061     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5062   } else {
5063     isrowa = *row;
5064   }
5065   if (!col) {
5066     start = A->cmap->rstart;
5067     cmap  = a->garray;
5068     nzA   = a->A->cmap->n;
5069     nzB   = a->B->cmap->n;
5070     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5071     ncols = 0;
5072     for (i=0; i<nzB; i++) {
5073       if (cmap[i] < start) idx[ncols++] = cmap[i];
5074       else break;
5075     }
5076     imark = i;
5077     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5078     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5079     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5080   } else {
5081     iscola = *col;
5082   }
5083   if (scall != MAT_INITIAL_MATRIX) {
5084     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5085     aloc[0] = *A_loc;
5086   }
5087   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5088   *A_loc = aloc[0];
5089   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5090   if (!row) {
5091     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5092   }
5093   if (!col) {
5094     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5095   }
5096   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5097   PetscFunctionReturn(0);
5098 }
5099 
5100 #undef __FUNCT__
5101 #define __FUNCT__ "MatGetBrowsOfAcols"
5102 /*@C
5103     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5104 
5105     Collective on Mat
5106 
5107    Input Parameters:
5108 +    A,B - the matrices in mpiaij format
5109 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5110 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5111 
5112    Output Parameter:
5113 +    rowb, colb - index sets of rows and columns of B to extract
5114 -    B_seq - the sequential matrix generated
5115 
5116     Level: developer
5117 
5118 @*/
5119 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5120 {
5121   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5122   PetscErrorCode ierr;
5123   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5124   IS             isrowb,iscolb;
5125   Mat            *bseq=NULL;
5126 
5127   PetscFunctionBegin;
5128   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5129     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5130   }
5131   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5132 
5133   if (scall == MAT_INITIAL_MATRIX) {
5134     start = A->cmap->rstart;
5135     cmap  = a->garray;
5136     nzA   = a->A->cmap->n;
5137     nzB   = a->B->cmap->n;
5138     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5139     ncols = 0;
5140     for (i=0; i<nzB; i++) {  /* row < local row index */
5141       if (cmap[i] < start) idx[ncols++] = cmap[i];
5142       else break;
5143     }
5144     imark = i;
5145     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5146     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5147     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5148     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5149   } else {
5150     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5151     isrowb  = *rowb; iscolb = *colb;
5152     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5153     bseq[0] = *B_seq;
5154   }
5155   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5156   *B_seq = bseq[0];
5157   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5158   if (!rowb) {
5159     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5160   } else {
5161     *rowb = isrowb;
5162   }
5163   if (!colb) {
5164     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5165   } else {
5166     *colb = iscolb;
5167   }
5168   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5169   PetscFunctionReturn(0);
5170 }
5171 
5172 #undef __FUNCT__
5173 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5174 /*
5175     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5176     of the OFF-DIAGONAL portion of local A
5177 
5178     Collective on Mat
5179 
5180    Input Parameters:
5181 +    A,B - the matrices in mpiaij format
5182 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5183 
5184    Output Parameter:
5185 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5186 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5187 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5188 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5189 
5190     Level: developer
5191 
5192 */
5193 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5194 {
5195   VecScatter_MPI_General *gen_to,*gen_from;
5196   PetscErrorCode         ierr;
5197   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5198   Mat_SeqAIJ             *b_oth;
5199   VecScatter             ctx =a->Mvctx;
5200   MPI_Comm               comm;
5201   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5202   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5203   PetscScalar            *rvalues,*svalues;
5204   MatScalar              *b_otha,*bufa,*bufA;
5205   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5206   MPI_Request            *rwaits = NULL,*swaits = NULL;
5207   MPI_Status             *sstatus,rstatus;
5208   PetscMPIInt            jj,size;
5209   PetscInt               *cols,sbs,rbs;
5210   PetscScalar            *vals;
5211 
5212   PetscFunctionBegin;
5213   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5214   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5215   if (size == 1) PetscFunctionReturn(0);
5216 
5217   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5218     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5219   }
5220   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5221   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5222 
5223   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5224   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5225   rvalues  = gen_from->values; /* holds the length of receiving row */
5226   svalues  = gen_to->values;   /* holds the length of sending row */
5227   nrecvs   = gen_from->n;
5228   nsends   = gen_to->n;
5229 
5230   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5231   srow    = gen_to->indices;    /* local row index to be sent */
5232   sstarts = gen_to->starts;
5233   sprocs  = gen_to->procs;
5234   sstatus = gen_to->sstatus;
5235   sbs     = gen_to->bs;
5236   rstarts = gen_from->starts;
5237   rprocs  = gen_from->procs;
5238   rbs     = gen_from->bs;
5239 
5240   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5241   if (scall == MAT_INITIAL_MATRIX) {
5242     /* i-array */
5243     /*---------*/
5244     /*  post receives */
5245     for (i=0; i<nrecvs; i++) {
5246       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5247       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5248       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5249     }
5250 
5251     /* pack the outgoing message */
5252     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5253 
5254     sstartsj[0] = 0;
5255     rstartsj[0] = 0;
5256     len         = 0; /* total length of j or a array to be sent */
5257     k           = 0;
5258     for (i=0; i<nsends; i++) {
5259       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5260       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5261       for (j=0; j<nrows; j++) {
5262         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5263         for (l=0; l<sbs; l++) {
5264           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5265 
5266           rowlen[j*sbs+l] = ncols;
5267 
5268           len += ncols;
5269           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5270         }
5271         k++;
5272       }
5273       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5274 
5275       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5276     }
5277     /* recvs and sends of i-array are completed */
5278     i = nrecvs;
5279     while (i--) {
5280       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5281     }
5282     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5283 
5284     /* allocate buffers for sending j and a arrays */
5285     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5286     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5287 
5288     /* create i-array of B_oth */
5289     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5290 
5291     b_othi[0] = 0;
5292     len       = 0; /* total length of j or a array to be received */
5293     k         = 0;
5294     for (i=0; i<nrecvs; i++) {
5295       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5296       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5297       for (j=0; j<nrows; j++) {
5298         b_othi[k+1] = b_othi[k] + rowlen[j];
5299         len        += rowlen[j]; k++;
5300       }
5301       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5302     }
5303 
5304     /* allocate space for j and a arrrays of B_oth */
5305     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5306     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5307 
5308     /* j-array */
5309     /*---------*/
5310     /*  post receives of j-array */
5311     for (i=0; i<nrecvs; i++) {
5312       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5313       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5314     }
5315 
5316     /* pack the outgoing message j-array */
5317     k = 0;
5318     for (i=0; i<nsends; i++) {
5319       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5320       bufJ  = bufj+sstartsj[i];
5321       for (j=0; j<nrows; j++) {
5322         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5323         for (ll=0; ll<sbs; ll++) {
5324           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5325           for (l=0; l<ncols; l++) {
5326             *bufJ++ = cols[l];
5327           }
5328           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5329         }
5330       }
5331       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5332     }
5333 
5334     /* recvs and sends of j-array are completed */
5335     i = nrecvs;
5336     while (i--) {
5337       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5338     }
5339     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5340   } else if (scall == MAT_REUSE_MATRIX) {
5341     sstartsj = *startsj_s;
5342     rstartsj = *startsj_r;
5343     bufa     = *bufa_ptr;
5344     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5345     b_otha   = b_oth->a;
5346   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5347 
5348   /* a-array */
5349   /*---------*/
5350   /*  post receives of a-array */
5351   for (i=0; i<nrecvs; i++) {
5352     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5353     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5354   }
5355 
5356   /* pack the outgoing message a-array */
5357   k = 0;
5358   for (i=0; i<nsends; i++) {
5359     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5360     bufA  = bufa+sstartsj[i];
5361     for (j=0; j<nrows; j++) {
5362       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5363       for (ll=0; ll<sbs; ll++) {
5364         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5365         for (l=0; l<ncols; l++) {
5366           *bufA++ = vals[l];
5367         }
5368         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5369       }
5370     }
5371     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5372   }
5373   /* recvs and sends of a-array are completed */
5374   i = nrecvs;
5375   while (i--) {
5376     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5377   }
5378   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5379   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5380 
5381   if (scall == MAT_INITIAL_MATRIX) {
5382     /* put together the new matrix */
5383     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5384 
5385     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5386     /* Since these are PETSc arrays, change flags to free them as necessary. */
5387     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5388     b_oth->free_a  = PETSC_TRUE;
5389     b_oth->free_ij = PETSC_TRUE;
5390     b_oth->nonew   = 0;
5391 
5392     ierr = PetscFree(bufj);CHKERRQ(ierr);
5393     if (!startsj_s || !bufa_ptr) {
5394       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5395       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5396     } else {
5397       *startsj_s = sstartsj;
5398       *startsj_r = rstartsj;
5399       *bufa_ptr  = bufa;
5400     }
5401   }
5402   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5403   PetscFunctionReturn(0);
5404 }
5405 
5406 #undef __FUNCT__
5407 #define __FUNCT__ "MatGetCommunicationStructs"
5408 /*@C
5409   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5410 
5411   Not Collective
5412 
5413   Input Parameters:
5414 . A - The matrix in mpiaij format
5415 
5416   Output Parameter:
5417 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5418 . colmap - A map from global column index to local index into lvec
5419 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5420 
5421   Level: developer
5422 
5423 @*/
5424 #if defined(PETSC_USE_CTABLE)
5425 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5426 #else
5427 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5428 #endif
5429 {
5430   Mat_MPIAIJ *a;
5431 
5432   PetscFunctionBegin;
5433   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5434   PetscValidPointer(lvec, 2);
5435   PetscValidPointer(colmap, 3);
5436   PetscValidPointer(multScatter, 4);
5437   a = (Mat_MPIAIJ*) A->data;
5438   if (lvec) *lvec = a->lvec;
5439   if (colmap) *colmap = a->colmap;
5440   if (multScatter) *multScatter = a->Mvctx;
5441   PetscFunctionReturn(0);
5442 }
5443 
5444 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5445 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5446 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5447 
5448 #undef __FUNCT__
5449 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5450 /*
5451     Computes (B'*A')' since computing B*A directly is untenable
5452 
5453                n                       p                          p
5454         (              )       (              )         (                  )
5455       m (      A       )  *  n (       B      )   =   m (         C        )
5456         (              )       (              )         (                  )
5457 
5458 */
5459 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5460 {
5461   PetscErrorCode ierr;
5462   Mat            At,Bt,Ct;
5463 
5464   PetscFunctionBegin;
5465   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5466   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5467   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5468   ierr = MatDestroy(&At);CHKERRQ(ierr);
5469   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5470   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5471   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5472   PetscFunctionReturn(0);
5473 }
5474 
5475 #undef __FUNCT__
5476 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5477 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5478 {
5479   PetscErrorCode ierr;
5480   PetscInt       m=A->rmap->n,n=B->cmap->n;
5481   Mat            Cmat;
5482 
5483   PetscFunctionBegin;
5484   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5485   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5486   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5487   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5488   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5489   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5490   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5491   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5492 
5493   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5494 
5495   *C = Cmat;
5496   PetscFunctionReturn(0);
5497 }
5498 
5499 /* ----------------------------------------------------------------*/
5500 #undef __FUNCT__
5501 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5502 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5503 {
5504   PetscErrorCode ierr;
5505 
5506   PetscFunctionBegin;
5507   if (scall == MAT_INITIAL_MATRIX) {
5508     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5509     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5510     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5511   }
5512   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5513   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5514   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5515   PetscFunctionReturn(0);
5516 }
5517 
5518 #if defined(PETSC_HAVE_MUMPS)
5519 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5520 #endif
5521 #if defined(PETSC_HAVE_PASTIX)
5522 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5523 #endif
5524 #if defined(PETSC_HAVE_SUPERLU_DIST)
5525 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5526 #endif
5527 #if defined(PETSC_HAVE_CLIQUE)
5528 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5529 #endif
5530 
5531 /*MC
5532    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5533 
5534    Options Database Keys:
5535 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5536 
5537   Level: beginner
5538 
5539 .seealso: MatCreateAIJ()
5540 M*/
5541 
5542 #undef __FUNCT__
5543 #define __FUNCT__ "MatCreate_MPIAIJ"
5544 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5545 {
5546   Mat_MPIAIJ     *b;
5547   PetscErrorCode ierr;
5548   PetscMPIInt    size;
5549 
5550   PetscFunctionBegin;
5551   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5552 
5553   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5554   B->data       = (void*)b;
5555   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5556   B->assembled  = PETSC_FALSE;
5557   B->insertmode = NOT_SET_VALUES;
5558   b->size       = size;
5559 
5560   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5561 
5562   /* build cache for off array entries formed */
5563   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5564 
5565   b->donotstash  = PETSC_FALSE;
5566   b->colmap      = 0;
5567   b->garray      = 0;
5568   b->roworiented = PETSC_TRUE;
5569 
5570   /* stuff used for matrix vector multiply */
5571   b->lvec  = NULL;
5572   b->Mvctx = NULL;
5573 
5574   /* stuff for MatGetRow() */
5575   b->rowindices   = 0;
5576   b->rowvalues    = 0;
5577   b->getrowactive = PETSC_FALSE;
5578 
5579   /* flexible pointer used in CUSP/CUSPARSE classes */
5580   b->spptr = NULL;
5581 
5582 #if defined(PETSC_HAVE_MUMPS)
5583   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5584 #endif
5585 #if defined(PETSC_HAVE_PASTIX)
5586   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5587 #endif
5588 #if defined(PETSC_HAVE_SUPERLU_DIST)
5589   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5590 #endif
5591 #if defined(PETSC_HAVE_CLIQUE)
5592   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5593 #endif
5594   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5595   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5596   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5597   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5607   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5608   PetscFunctionReturn(0);
5609 }
5610 
5611 #undef __FUNCT__
5612 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5613 /*@C
5614      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5615          and "off-diagonal" part of the matrix in CSR format.
5616 
5617    Collective on MPI_Comm
5618 
5619    Input Parameters:
5620 +  comm - MPI communicator
5621 .  m - number of local rows (Cannot be PETSC_DECIDE)
5622 .  n - This value should be the same as the local size used in creating the
5623        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5624        calculated if N is given) For square matrices n is almost always m.
5625 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5626 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5627 .   i - row indices for "diagonal" portion of matrix
5628 .   j - column indices
5629 .   a - matrix values
5630 .   oi - row indices for "off-diagonal" portion of matrix
5631 .   oj - column indices
5632 -   oa - matrix values
5633 
5634    Output Parameter:
5635 .   mat - the matrix
5636 
5637    Level: advanced
5638 
5639    Notes:
5640        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5641        must free the arrays once the matrix has been destroyed and not before.
5642 
5643        The i and j indices are 0 based
5644 
5645        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5646 
5647        This sets local rows and cannot be used to set off-processor values.
5648 
5649        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5650        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5651        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5652        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5653        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5654        communication if it is known that only local entries will be set.
5655 
5656 .keywords: matrix, aij, compressed row, sparse, parallel
5657 
5658 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5659           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5660 C@*/
5661 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5662 {
5663   PetscErrorCode ierr;
5664   Mat_MPIAIJ     *maij;
5665 
5666   PetscFunctionBegin;
5667   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5668   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5669   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5670   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5671   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5672   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5673   maij = (Mat_MPIAIJ*) (*mat)->data;
5674 
5675   (*mat)->preallocated = PETSC_TRUE;
5676 
5677   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5678   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5679 
5680   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5681   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5682 
5683   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5684   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5685   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5686   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5687 
5688   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5689   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5690   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5691   PetscFunctionReturn(0);
5692 }
5693 
5694 /*
5695     Special version for direct calls from Fortran
5696 */
5697 #include <petsc-private/fortranimpl.h>
5698 
5699 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5700 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5701 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5702 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5703 #endif
5704 
5705 /* Change these macros so can be used in void function */
5706 #undef CHKERRQ
5707 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5708 #undef SETERRQ2
5709 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5710 #undef SETERRQ3
5711 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5712 #undef SETERRQ
5713 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5714 
5715 #undef __FUNCT__
5716 #define __FUNCT__ "matsetvaluesmpiaij_"
5717 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5718 {
5719   Mat            mat  = *mmat;
5720   PetscInt       m    = *mm, n = *mn;
5721   InsertMode     addv = *maddv;
5722   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5723   PetscScalar    value;
5724   PetscErrorCode ierr;
5725 
5726   MatCheckPreallocated(mat,1);
5727   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5728 
5729 #if defined(PETSC_USE_DEBUG)
5730   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5731 #endif
5732   {
5733     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5734     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5735     PetscBool roworiented = aij->roworiented;
5736 
5737     /* Some Variables required in the macro */
5738     Mat        A                 = aij->A;
5739     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5740     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5741     MatScalar  *aa               = a->a;
5742     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5743     Mat        B                 = aij->B;
5744     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5745     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5746     MatScalar  *ba               = b->a;
5747 
5748     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5749     PetscInt  nonew = a->nonew;
5750     MatScalar *ap1,*ap2;
5751 
5752     PetscFunctionBegin;
5753     for (i=0; i<m; i++) {
5754       if (im[i] < 0) continue;
5755 #if defined(PETSC_USE_DEBUG)
5756       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5757 #endif
5758       if (im[i] >= rstart && im[i] < rend) {
5759         row      = im[i] - rstart;
5760         lastcol1 = -1;
5761         rp1      = aj + ai[row];
5762         ap1      = aa + ai[row];
5763         rmax1    = aimax[row];
5764         nrow1    = ailen[row];
5765         low1     = 0;
5766         high1    = nrow1;
5767         lastcol2 = -1;
5768         rp2      = bj + bi[row];
5769         ap2      = ba + bi[row];
5770         rmax2    = bimax[row];
5771         nrow2    = bilen[row];
5772         low2     = 0;
5773         high2    = nrow2;
5774 
5775         for (j=0; j<n; j++) {
5776           if (roworiented) value = v[i*n+j];
5777           else value = v[i+j*m];
5778           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5779           if (in[j] >= cstart && in[j] < cend) {
5780             col = in[j] - cstart;
5781             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5782           } else if (in[j] < 0) continue;
5783 #if defined(PETSC_USE_DEBUG)
5784           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5785 #endif
5786           else {
5787             if (mat->was_assembled) {
5788               if (!aij->colmap) {
5789                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5790               }
5791 #if defined(PETSC_USE_CTABLE)
5792               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5793               col--;
5794 #else
5795               col = aij->colmap[in[j]] - 1;
5796 #endif
5797               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5798                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5799                 col  =  in[j];
5800                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5801                 B     = aij->B;
5802                 b     = (Mat_SeqAIJ*)B->data;
5803                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5804                 rp2   = bj + bi[row];
5805                 ap2   = ba + bi[row];
5806                 rmax2 = bimax[row];
5807                 nrow2 = bilen[row];
5808                 low2  = 0;
5809                 high2 = nrow2;
5810                 bm    = aij->B->rmap->n;
5811                 ba    = b->a;
5812               }
5813             } else col = in[j];
5814             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5815           }
5816         }
5817       } else if (!aij->donotstash) {
5818         if (roworiented) {
5819           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5820         } else {
5821           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5822         }
5823       }
5824     }
5825   }
5826   PetscFunctionReturnVoid();
5827 }
5828 
5829