xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision b7b27f36d7a1fe91f03b1bc30ce7c8eef79f1a09) !
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   for (i=0; i<m; i++) {
502     if (im[i] < 0) continue;
503 #if defined(PETSC_USE_DEBUG)
504     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
505 #endif
506     if (im[i] >= rstart && im[i] < rend) {
507       row      = im[i] - rstart;
508       lastcol1 = -1;
509       rp1      = aj + ai[row];
510       ap1      = aa + ai[row];
511       rmax1    = aimax[row];
512       nrow1    = ailen[row];
513       low1     = 0;
514       high1    = nrow1;
515       lastcol2 = -1;
516       rp2      = bj + bi[row];
517       ap2      = ba + bi[row];
518       rmax2    = bimax[row];
519       nrow2    = bilen[row];
520       low2     = 0;
521       high2    = nrow2;
522 
523       for (j=0; j<n; j++) {
524         if (roworiented) value = v[i*n+j];
525         else             value = v[i+j*m];
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend) {
528           col   = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined(PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542             col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col  =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B     = aij->B;
551               b     = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2   = bj + bi[row];
554               ap2   = ba + bi[row];
555               rmax2 = bimax[row];
556               nrow2 = bilen[row];
557               low2  = 0;
558               high2 = nrow2;
559               bm    = aij->B->rmap->n;
560               ba    = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend) {
601           col  = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined(PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col--;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       nstash,reallocs;
633   InsertMode     addv;
634 
635   PetscFunctionBegin;
636   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
637 
638   /* make sure all processors are either in INSERTMODE or ADDMODE */
639   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
641   mat->insertmode = addv; /* in case this processor had no cache */
642 
643   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
644   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
645   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
652 {
653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
654   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
655   PetscErrorCode ierr;
656   PetscMPIInt    n;
657   PetscInt       i,j,rstart,ncols,flg;
658   PetscInt       *row,*col;
659   PetscBool      other_disassembled;
660   PetscScalar    *val;
661   InsertMode     addv = mat->insertmode;
662 
663   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
664 
665   PetscFunctionBegin;
666   if (!aij->donotstash && !mat->nooffprocentries) {
667     while (1) {
668       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
669       if (!flg) break;
670 
671       for (i=0; i<n; ) {
672         /* Now identify the consecutive vals belonging to the same row */
673         for (j=i,rstart=row[j]; j<n; j++) {
674           if (row[j] != rstart) break;
675         }
676         if (j < n) ncols = j-i;
677         else       ncols = n-i;
678         /* Now assemble all these values with a single function call */
679         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
680 
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
706   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
707 
708   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
709 
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718 
719   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
720   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
721     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
722     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
730 {
731   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
732   PetscErrorCode ierr;
733 
734   PetscFunctionBegin;
735   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
736   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatZeroRows_MPIAIJ"
742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
743 {
744   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
745   PetscInt      *owners = A->rmap->range;
746   PetscInt       n      = A->rmap->n;
747   PetscMPIInt    size   = mat->size;
748   PetscSF        sf;
749   PetscInt      *lrows;
750   PetscSFNode   *rrows;
751   PetscInt       lastidx = -1, r, p = 0, len = 0;
752   PetscErrorCode ierr;
753 
754   PetscFunctionBegin;
755   /* Create SF where leaves are input rows and roots are owned rows */
756   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
757   for (r = 0; r < n; ++r) lrows[r] = -1;
758   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
759   for (r = 0; r < N; ++r) {
760     const PetscInt idx   = rows[r];
761     PetscBool      found = PETSC_FALSE;
762     /* Trick for efficient searching for sorted rows */
763     if (lastidx > idx) p = 0;
764     lastidx = idx;
765     for (; p < size; ++p) {
766       if (idx >= owners[p] && idx < owners[p+1]) {
767         rrows[r].rank  = p;
768         rrows[r].index = rows[r] - owners[p];
769         found = PETSC_TRUE;
770         break;
771       }
772     }
773     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
774   }
775   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
776   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
777   /* Collect flags for rows to be zeroed */
778   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
779   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
780   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
781   /* Compress and put in row numbers */
782   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
783   /* fix right hand side if needed */
784   if (x && b) {
785     const PetscScalar *xx;
786     PetscScalar       *bb;
787 
788     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
789     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
790     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
791     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
792     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
793   }
794   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
795   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
796   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
822 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
823 {
824   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
825   PetscErrorCode    ierr;
826   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
827   PetscInt          i,j,r,m,p = 0,len = 0;
828   PetscInt          *lrows,*owners = A->rmap->range;
829   PetscSFNode       *rrows;
830   PetscSF           sf;
831   const PetscScalar *xx;
832   PetscScalar       *bb,*mask;
833   Vec               xmask,lmask;
834   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
835   const PetscInt    *aj, *ii,*ridx;
836   PetscScalar       *aa;
837 #if defined(PETSC_DEBUG)
838   PetscBool found = PETSC_FALSE;
839 #endif
840 
841   PetscFunctionBegin;
842   /* Create SF where leaves are input rows and roots are owned rows */
843   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
844   for (r = 0; r < n; ++r) lrows[r] = -1;
845   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
846   for (r = 0; r < N; ++r) {
847     const PetscInt idx   = rows[r];
848     PetscBool      found = PETSC_FALSE;
849     /* Trick for efficient searching for sorted rows */
850     if (lastidx > idx) p = 0;
851     lastidx = idx;
852     for (; p < size; ++p) {
853       if (idx >= owners[p] && idx < owners[p+1]) {
854         rrows[r].rank  = p;
855         rrows[r].index = rows[r] - owners[p];
856         found = PETSC_TRUE;
857         break;
858       }
859     }
860     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
861   }
862   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
863   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
864   /* Collect flags for rows to be zeroed */
865   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
866   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
867   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
868   /* Compress and put in row numbers */
869   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
870   /* zero diagonal part of matrix */
871   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
872   /* handle off diagonal part of matrix */
873   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
874   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
875   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
876   for (i=0; i<len; i++) bb[lrows[i]] = 1;
877   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
878   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
881   if (x) {
882     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
885     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
886   }
887   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
888   /* remove zeroed rows of off diagonal matrix */
889   ii = aij->i;
890   for (i=0; i<len; i++) {
891     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
892   }
893   /* loop over all elements of off process part of matrix zeroing removed columns*/
894   if (aij->compressedrow.use) {
895     m    = aij->compressedrow.nrows;
896     ii   = aij->compressedrow.i;
897     ridx = aij->compressedrow.rindex;
898     for (i=0; i<m; i++) {
899       n  = ii[i+1] - ii[i];
900       aj = aij->j + ii[i];
901       aa = aij->a + ii[i];
902 
903       for (j=0; j<n; j++) {
904         if (PetscAbsScalar(mask[*aj])) {
905           if (b) bb[*ridx] -= *aa*xx[*aj];
906           *aa = 0.0;
907         }
908         aa++;
909         aj++;
910       }
911       ridx++;
912     }
913   } else { /* do not use compressed row format */
914     m = l->B->rmap->n;
915     for (i=0; i<m; i++) {
916       n  = ii[i+1] - ii[i];
917       aj = aij->j + ii[i];
918       aa = aij->a + ii[i];
919       for (j=0; j<n; j++) {
920         if (PetscAbsScalar(mask[*aj])) {
921           if (b) bb[i] -= *aa*xx[*aj];
922           *aa = 0.0;
923         }
924         aa++;
925         aj++;
926       }
927     }
928   }
929   if (x) {
930     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
931     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
932   }
933   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
934   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
935   ierr = PetscFree(lrows);CHKERRQ(ierr);
936 
937   /* only change matrix nonzero state if pattern was allowed to be changed */
938   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
939     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
940     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
941   }
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatMult_MPIAIJ"
947 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
948 {
949   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
950   PetscErrorCode ierr;
951   PetscInt       nt;
952 
953   PetscFunctionBegin;
954   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
955   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
956   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
957   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
958   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
960   PetscFunctionReturn(0);
961 }
962 
963 #undef __FUNCT__
964 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
965 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
966 {
967   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
968   PetscErrorCode ierr;
969 
970   PetscFunctionBegin;
971   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatMultAdd_MPIAIJ"
977 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
978 {
979   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
980   PetscErrorCode ierr;
981 
982   PetscFunctionBegin;
983   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
984   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
985   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
986   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
992 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
993 {
994   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
995   PetscErrorCode ierr;
996   PetscBool      merged;
997 
998   PetscFunctionBegin;
999   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1000   /* do nondiagonal part */
1001   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1002   if (!merged) {
1003     /* send it on its way */
1004     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1005     /* do local part */
1006     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1007     /* receive remote parts: note this assumes the values are not actually */
1008     /* added in yy until the next line, */
1009     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1010   } else {
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* send it on its way */
1014     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1015     /* values actually were received in the Begin() but we need to call this nop */
1016     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1017   }
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1023 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1024 {
1025   MPI_Comm       comm;
1026   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1027   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1028   IS             Me,Notme;
1029   PetscErrorCode ierr;
1030   PetscInt       M,N,first,last,*notme,i;
1031   PetscMPIInt    size;
1032 
1033   PetscFunctionBegin;
1034   /* Easy test: symmetric diagonal block */
1035   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1036   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1037   if (!*f) PetscFunctionReturn(0);
1038   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1039   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1040   if (size == 1) PetscFunctionReturn(0);
1041 
1042   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1043   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1044   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1045   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1046   for (i=0; i<first; i++) notme[i] = i;
1047   for (i=last; i<M; i++) notme[i-last+first] = i;
1048   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1049   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1050   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1051   Aoff = Aoffs[0];
1052   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1053   Boff = Boffs[0];
1054   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1055   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1056   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1057   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1058   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1059   ierr = PetscFree(notme);CHKERRQ(ierr);
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 #undef __FUNCT__
1064 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1065 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1066 {
1067   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1068   PetscErrorCode ierr;
1069 
1070   PetscFunctionBegin;
1071   /* do nondiagonal part */
1072   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1073   /* send it on its way */
1074   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1075   /* do local part */
1076   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1077   /* receive remote parts */
1078   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 /*
1083   This only works correctly for square matrices where the subblock A->A is the
1084    diagonal block
1085 */
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1088 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1089 {
1090   PetscErrorCode ierr;
1091   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1092 
1093   PetscFunctionBegin;
1094   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1095   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1096   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatScale_MPIAIJ"
1102 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1109   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatDestroy_MPIAIJ"
1115 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1116 {
1117   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1118   PetscErrorCode ierr;
1119 
1120   PetscFunctionBegin;
1121 #if defined(PETSC_USE_LOG)
1122   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1123 #endif
1124   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1125   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1126   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1127   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1128 #if defined(PETSC_USE_CTABLE)
1129   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1130 #else
1131   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1132 #endif
1133   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1134   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1135   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1136   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1137   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1138   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1139 
1140   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1144   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1145   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1146   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1147   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1154 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1155 {
1156   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1157   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1158   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1159   PetscErrorCode ierr;
1160   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1161   int            fd;
1162   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1163   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1164   PetscScalar    *column_values;
1165   PetscInt       message_count,flowcontrolcount;
1166   FILE           *file;
1167 
1168   PetscFunctionBegin;
1169   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1170   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1171   nz   = A->nz + B->nz;
1172   if (!rank) {
1173     header[0] = MAT_FILE_CLASSID;
1174     header[1] = mat->rmap->N;
1175     header[2] = mat->cmap->N;
1176 
1177     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1178     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1179     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1180     /* get largest number of rows any processor has */
1181     rlen  = mat->rmap->n;
1182     range = mat->rmap->range;
1183     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1184   } else {
1185     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1186     rlen = mat->rmap->n;
1187   }
1188 
1189   /* load up the local row counts */
1190   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1191   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1192 
1193   /* store the row lengths to the file */
1194   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1195   if (!rank) {
1196     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1197     for (i=1; i<size; i++) {
1198       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1199       rlen = range[i+1] - range[i];
1200       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1201       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1202     }
1203     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1204   } else {
1205     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1206     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1207     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1208   }
1209   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1210 
1211   /* load up the local column indices */
1212   nzmax = nz; /* th processor needs space a largest processor needs */
1213   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1214   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1215   cnt   = 0;
1216   for (i=0; i<mat->rmap->n; i++) {
1217     for (j=B->i[i]; j<B->i[i+1]; j++) {
1218       if ((col = garray[B->j[j]]) > cstart) break;
1219       column_indices[cnt++] = col;
1220     }
1221     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1222     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1223   }
1224   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1225 
1226   /* store the column indices to the file */
1227   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1228   if (!rank) {
1229     MPI_Status status;
1230     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1231     for (i=1; i<size; i++) {
1232       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1233       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1234       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1235       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1236       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1237     }
1238     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1239   } else {
1240     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1241     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1242     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1243     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1244   }
1245   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1246 
1247   /* load up the local column values */
1248   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1249   cnt  = 0;
1250   for (i=0; i<mat->rmap->n; i++) {
1251     for (j=B->i[i]; j<B->i[i+1]; j++) {
1252       if (garray[B->j[j]] > cstart) break;
1253       column_values[cnt++] = B->a[j];
1254     }
1255     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1256     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1257   }
1258   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1259 
1260   /* store the column values to the file */
1261   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1262   if (!rank) {
1263     MPI_Status status;
1264     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1265     for (i=1; i<size; i++) {
1266       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1267       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1268       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1269       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1271     }
1272     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1273   } else {
1274     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1275     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1277     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1278   }
1279   ierr = PetscFree(column_values);CHKERRQ(ierr);
1280 
1281   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1282   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #include <petscdraw.h>
1287 #undef __FUNCT__
1288 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1289 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1290 {
1291   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1292   PetscErrorCode    ierr;
1293   PetscMPIInt       rank = aij->rank,size = aij->size;
1294   PetscBool         isdraw,iascii,isbinary;
1295   PetscViewer       sviewer;
1296   PetscViewerFormat format;
1297 
1298   PetscFunctionBegin;
1299   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1300   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1301   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1302   if (iascii) {
1303     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1304     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1305       MatInfo   info;
1306       PetscBool inodes;
1307 
1308       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1309       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1310       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1311       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1312       if (!inodes) {
1313         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1314                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1315       } else {
1316         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1317                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1318       }
1319       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1320       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1321       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1322       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1323       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1324       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1325       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1326       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1327       PetscFunctionReturn(0);
1328     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1329       PetscInt inodecount,inodelimit,*inodes;
1330       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1331       if (inodes) {
1332         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1333       } else {
1334         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1335       }
1336       PetscFunctionReturn(0);
1337     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1338       PetscFunctionReturn(0);
1339     }
1340   } else if (isbinary) {
1341     if (size == 1) {
1342       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1343       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1344     } else {
1345       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1346     }
1347     PetscFunctionReturn(0);
1348   } else if (isdraw) {
1349     PetscDraw draw;
1350     PetscBool isnull;
1351     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1352     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1353   }
1354 
1355   {
1356     /* assemble the entire matrix onto first processor. */
1357     Mat        A;
1358     Mat_SeqAIJ *Aloc;
1359     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1360     MatScalar  *a;
1361 
1362     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1363     if (!rank) {
1364       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1365     } else {
1366       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1367     }
1368     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1369     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1370     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1371     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1372     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1373 
1374     /* copy over the A part */
1375     Aloc = (Mat_SeqAIJ*)aij->A->data;
1376     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1377     row  = mat->rmap->rstart;
1378     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1379     for (i=0; i<m; i++) {
1380       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1381       row++;
1382       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1383     }
1384     aj = Aloc->j;
1385     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1386 
1387     /* copy over the B part */
1388     Aloc = (Mat_SeqAIJ*)aij->B->data;
1389     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1390     row  = mat->rmap->rstart;
1391     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1392     ct   = cols;
1393     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1394     for (i=0; i<m; i++) {
1395       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1396       row++;
1397       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1398     }
1399     ierr = PetscFree(ct);CHKERRQ(ierr);
1400     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1401     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1402     /*
1403        Everyone has to call to draw the matrix since the graphics waits are
1404        synchronized across all processors that share the PetscDraw object
1405     */
1406     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1407     if (!rank) {
1408       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1409     }
1410     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1411     ierr = MatDestroy(&A);CHKERRQ(ierr);
1412   }
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #undef __FUNCT__
1417 #define __FUNCT__ "MatView_MPIAIJ"
1418 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1419 {
1420   PetscErrorCode ierr;
1421   PetscBool      iascii,isdraw,issocket,isbinary;
1422 
1423   PetscFunctionBegin;
1424   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1425   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1426   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1427   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1428   if (iascii || isdraw || isbinary || issocket) {
1429     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1430   }
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatSOR_MPIAIJ"
1436 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1437 {
1438   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1439   PetscErrorCode ierr;
1440   Vec            bb1 = 0;
1441   PetscBool      hasop;
1442 
1443   PetscFunctionBegin;
1444   if (flag == SOR_APPLY_UPPER) {
1445     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1446     PetscFunctionReturn(0);
1447   }
1448 
1449   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1450     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1451   }
1452 
1453   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1454     if (flag & SOR_ZERO_INITIAL_GUESS) {
1455       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1456       its--;
1457     }
1458 
1459     while (its--) {
1460       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1461       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1462 
1463       /* update rhs: bb1 = bb - B*x */
1464       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1465       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1466 
1467       /* local sweep */
1468       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1469     }
1470   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1471     if (flag & SOR_ZERO_INITIAL_GUESS) {
1472       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1473       its--;
1474     }
1475     while (its--) {
1476       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1477       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1478 
1479       /* update rhs: bb1 = bb - B*x */
1480       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1481       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1482 
1483       /* local sweep */
1484       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1485     }
1486   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1487     if (flag & SOR_ZERO_INITIAL_GUESS) {
1488       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1489       its--;
1490     }
1491     while (its--) {
1492       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1493       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1494 
1495       /* update rhs: bb1 = bb - B*x */
1496       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1497       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1498 
1499       /* local sweep */
1500       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1501     }
1502   } else if (flag & SOR_EISENSTAT) {
1503     Vec xx1;
1504 
1505     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1506     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1507 
1508     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1509     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1510     if (!mat->diag) {
1511       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1512       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1513     }
1514     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1515     if (hasop) {
1516       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1517     } else {
1518       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1519     }
1520     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1521 
1522     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1523 
1524     /* local sweep */
1525     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1526     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1527     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1528   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1529 
1530   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 #undef __FUNCT__
1535 #define __FUNCT__ "MatPermute_MPIAIJ"
1536 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1537 {
1538   Mat            aA,aB,Aperm;
1539   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1540   PetscScalar    *aa,*ba;
1541   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1542   PetscSF        rowsf,sf;
1543   IS             parcolp = NULL;
1544   PetscBool      done;
1545   PetscErrorCode ierr;
1546 
1547   PetscFunctionBegin;
1548   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1549   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1550   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1551   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1552 
1553   /* Invert row permutation to find out where my rows should go */
1554   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1555   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1556   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1557   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1558   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1559   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1560 
1561   /* Invert column permutation to find out where my columns should go */
1562   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1563   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1564   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1565   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1566   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1567   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1568   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1569 
1570   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1571   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1572   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1573 
1574   /* Find out where my gcols should go */
1575   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1576   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1577   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1578   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1579   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1580   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1581   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1582   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1583 
1584   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1585   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1586   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1587   for (i=0; i<m; i++) {
1588     PetscInt row = rdest[i],rowner;
1589     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1590     for (j=ai[i]; j<ai[i+1]; j++) {
1591       PetscInt cowner,col = cdest[aj[j]];
1592       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1593       if (rowner == cowner) dnnz[i]++;
1594       else onnz[i]++;
1595     }
1596     for (j=bi[i]; j<bi[i+1]; j++) {
1597       PetscInt cowner,col = gcdest[bj[j]];
1598       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1599       if (rowner == cowner) dnnz[i]++;
1600       else onnz[i]++;
1601     }
1602   }
1603   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1604   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1605   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1606   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1607   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1608 
1609   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1610   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1611   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1612   for (i=0; i<m; i++) {
1613     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1614     PetscInt j0,rowlen;
1615     rowlen = ai[i+1] - ai[i];
1616     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1617       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1618       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1619     }
1620     rowlen = bi[i+1] - bi[i];
1621     for (j0=j=0; j<rowlen; j0=j) {
1622       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1623       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1624     }
1625   }
1626   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1627   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1628   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1629   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1630   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1631   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1632   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1633   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1634   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1635   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1636   *B = Aperm;
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 #undef __FUNCT__
1641 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1642 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1643 {
1644   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1645   Mat            A    = mat->A,B = mat->B;
1646   PetscErrorCode ierr;
1647   PetscReal      isend[5],irecv[5];
1648 
1649   PetscFunctionBegin;
1650   info->block_size = 1.0;
1651   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1652 
1653   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1654   isend[3] = info->memory;  isend[4] = info->mallocs;
1655 
1656   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1657 
1658   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1659   isend[3] += info->memory;  isend[4] += info->mallocs;
1660   if (flag == MAT_LOCAL) {
1661     info->nz_used      = isend[0];
1662     info->nz_allocated = isend[1];
1663     info->nz_unneeded  = isend[2];
1664     info->memory       = isend[3];
1665     info->mallocs      = isend[4];
1666   } else if (flag == MAT_GLOBAL_MAX) {
1667     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1668 
1669     info->nz_used      = irecv[0];
1670     info->nz_allocated = irecv[1];
1671     info->nz_unneeded  = irecv[2];
1672     info->memory       = irecv[3];
1673     info->mallocs      = irecv[4];
1674   } else if (flag == MAT_GLOBAL_SUM) {
1675     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1676 
1677     info->nz_used      = irecv[0];
1678     info->nz_allocated = irecv[1];
1679     info->nz_unneeded  = irecv[2];
1680     info->memory       = irecv[3];
1681     info->mallocs      = irecv[4];
1682   }
1683   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1684   info->fill_ratio_needed = 0;
1685   info->factor_mallocs    = 0;
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatSetOption_MPIAIJ"
1691 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1692 {
1693   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1694   PetscErrorCode ierr;
1695 
1696   PetscFunctionBegin;
1697   switch (op) {
1698   case MAT_NEW_NONZERO_LOCATIONS:
1699   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1700   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1701   case MAT_KEEP_NONZERO_PATTERN:
1702   case MAT_NEW_NONZERO_LOCATION_ERR:
1703   case MAT_USE_INODES:
1704   case MAT_IGNORE_ZERO_ENTRIES:
1705     MatCheckPreallocated(A,1);
1706     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1707     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1708     break;
1709   case MAT_ROW_ORIENTED:
1710     a->roworiented = flg;
1711 
1712     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1713     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1714     break;
1715   case MAT_NEW_DIAGONALS:
1716     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1717     break;
1718   case MAT_IGNORE_OFF_PROC_ENTRIES:
1719     a->donotstash = flg;
1720     break;
1721   case MAT_SPD:
1722     A->spd_set = PETSC_TRUE;
1723     A->spd     = flg;
1724     if (flg) {
1725       A->symmetric                  = PETSC_TRUE;
1726       A->structurally_symmetric     = PETSC_TRUE;
1727       A->symmetric_set              = PETSC_TRUE;
1728       A->structurally_symmetric_set = PETSC_TRUE;
1729     }
1730     break;
1731   case MAT_SYMMETRIC:
1732     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1733     break;
1734   case MAT_STRUCTURALLY_SYMMETRIC:
1735     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1736     break;
1737   case MAT_HERMITIAN:
1738     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1739     break;
1740   case MAT_SYMMETRY_ETERNAL:
1741     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1742     break;
1743   default:
1744     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1745   }
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 #undef __FUNCT__
1750 #define __FUNCT__ "MatGetRow_MPIAIJ"
1751 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1752 {
1753   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1754   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1755   PetscErrorCode ierr;
1756   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1757   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1758   PetscInt       *cmap,*idx_p;
1759 
1760   PetscFunctionBegin;
1761   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1762   mat->getrowactive = PETSC_TRUE;
1763 
1764   if (!mat->rowvalues && (idx || v)) {
1765     /*
1766         allocate enough space to hold information from the longest row.
1767     */
1768     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1769     PetscInt   max = 1,tmp;
1770     for (i=0; i<matin->rmap->n; i++) {
1771       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1772       if (max < tmp) max = tmp;
1773     }
1774     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1775   }
1776 
1777   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1778   lrow = row - rstart;
1779 
1780   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1781   if (!v)   {pvA = 0; pvB = 0;}
1782   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1783   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1784   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1785   nztot = nzA + nzB;
1786 
1787   cmap = mat->garray;
1788   if (v  || idx) {
1789     if (nztot) {
1790       /* Sort by increasing column numbers, assuming A and B already sorted */
1791       PetscInt imark = -1;
1792       if (v) {
1793         *v = v_p = mat->rowvalues;
1794         for (i=0; i<nzB; i++) {
1795           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1796           else break;
1797         }
1798         imark = i;
1799         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1800         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1801       }
1802       if (idx) {
1803         *idx = idx_p = mat->rowindices;
1804         if (imark > -1) {
1805           for (i=0; i<imark; i++) {
1806             idx_p[i] = cmap[cworkB[i]];
1807           }
1808         } else {
1809           for (i=0; i<nzB; i++) {
1810             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1811             else break;
1812           }
1813           imark = i;
1814         }
1815         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1816         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1817       }
1818     } else {
1819       if (idx) *idx = 0;
1820       if (v)   *v   = 0;
1821     }
1822   }
1823   *nz  = nztot;
1824   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1825   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 #undef __FUNCT__
1830 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1831 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1832 {
1833   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1834 
1835   PetscFunctionBegin;
1836   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1837   aij->getrowactive = PETSC_FALSE;
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 #undef __FUNCT__
1842 #define __FUNCT__ "MatNorm_MPIAIJ"
1843 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1844 {
1845   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1846   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1847   PetscErrorCode ierr;
1848   PetscInt       i,j,cstart = mat->cmap->rstart;
1849   PetscReal      sum = 0.0;
1850   MatScalar      *v;
1851 
1852   PetscFunctionBegin;
1853   if (aij->size == 1) {
1854     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1855   } else {
1856     if (type == NORM_FROBENIUS) {
1857       v = amat->a;
1858       for (i=0; i<amat->nz; i++) {
1859         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1860       }
1861       v = bmat->a;
1862       for (i=0; i<bmat->nz; i++) {
1863         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1864       }
1865       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1866       *norm = PetscSqrtReal(*norm);
1867     } else if (type == NORM_1) { /* max column norm */
1868       PetscReal *tmp,*tmp2;
1869       PetscInt  *jj,*garray = aij->garray;
1870       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1871       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1872       *norm = 0.0;
1873       v     = amat->a; jj = amat->j;
1874       for (j=0; j<amat->nz; j++) {
1875         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1876       }
1877       v = bmat->a; jj = bmat->j;
1878       for (j=0; j<bmat->nz; j++) {
1879         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1880       }
1881       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1882       for (j=0; j<mat->cmap->N; j++) {
1883         if (tmp2[j] > *norm) *norm = tmp2[j];
1884       }
1885       ierr = PetscFree(tmp);CHKERRQ(ierr);
1886       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1887     } else if (type == NORM_INFINITY) { /* max row norm */
1888       PetscReal ntemp = 0.0;
1889       for (j=0; j<aij->A->rmap->n; j++) {
1890         v   = amat->a + amat->i[j];
1891         sum = 0.0;
1892         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1893           sum += PetscAbsScalar(*v); v++;
1894         }
1895         v = bmat->a + bmat->i[j];
1896         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1897           sum += PetscAbsScalar(*v); v++;
1898         }
1899         if (sum > ntemp) ntemp = sum;
1900       }
1901       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1902     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1903   }
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 #undef __FUNCT__
1908 #define __FUNCT__ "MatTranspose_MPIAIJ"
1909 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1910 {
1911   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1912   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1913   PetscErrorCode ierr;
1914   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1915   PetscInt       cstart = A->cmap->rstart,ncol;
1916   Mat            B;
1917   MatScalar      *array;
1918 
1919   PetscFunctionBegin;
1920   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1921 
1922   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1923   ai = Aloc->i; aj = Aloc->j;
1924   bi = Bloc->i; bj = Bloc->j;
1925   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1926     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1927     PetscSFNode          *oloc;
1928     PETSC_UNUSED PetscSF sf;
1929 
1930     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1931     /* compute d_nnz for preallocation */
1932     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1933     for (i=0; i<ai[ma]; i++) {
1934       d_nnz[aj[i]]++;
1935       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1936     }
1937     /* compute local off-diagonal contributions */
1938     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1939     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1940     /* map those to global */
1941     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1942     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1943     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1944     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1945     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1946     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1947     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1948 
1949     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1950     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1951     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1952     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1953     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1954     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1955   } else {
1956     B    = *matout;
1957     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1958     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1959   }
1960 
1961   /* copy over the A part */
1962   array = Aloc->a;
1963   row   = A->rmap->rstart;
1964   for (i=0; i<ma; i++) {
1965     ncol = ai[i+1]-ai[i];
1966     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1967     row++;
1968     array += ncol; aj += ncol;
1969   }
1970   aj = Aloc->j;
1971   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1972 
1973   /* copy over the B part */
1974   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1975   array = Bloc->a;
1976   row   = A->rmap->rstart;
1977   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1978   cols_tmp = cols;
1979   for (i=0; i<mb; i++) {
1980     ncol = bi[i+1]-bi[i];
1981     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1982     row++;
1983     array += ncol; cols_tmp += ncol;
1984   }
1985   ierr = PetscFree(cols);CHKERRQ(ierr);
1986 
1987   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1988   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1989   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1990     *matout = B;
1991   } else {
1992     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1993   }
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1999 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2000 {
2001   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2002   Mat            a    = aij->A,b = aij->B;
2003   PetscErrorCode ierr;
2004   PetscInt       s1,s2,s3;
2005 
2006   PetscFunctionBegin;
2007   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2008   if (rr) {
2009     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2010     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2011     /* Overlap communication with computation. */
2012     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2013   }
2014   if (ll) {
2015     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2016     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2017     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2018   }
2019   /* scale  the diagonal block */
2020   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2021 
2022   if (rr) {
2023     /* Do a scatter end and then right scale the off-diagonal block */
2024     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2025     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2026   }
2027   PetscFunctionReturn(0);
2028 }
2029 
2030 #undef __FUNCT__
2031 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2032 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2033 {
2034   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2035   PetscErrorCode ierr;
2036 
2037   PetscFunctionBegin;
2038   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "MatEqual_MPIAIJ"
2044 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2045 {
2046   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2047   Mat            a,b,c,d;
2048   PetscBool      flg;
2049   PetscErrorCode ierr;
2050 
2051   PetscFunctionBegin;
2052   a = matA->A; b = matA->B;
2053   c = matB->A; d = matB->B;
2054 
2055   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2056   if (flg) {
2057     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2058   }
2059   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 #undef __FUNCT__
2064 #define __FUNCT__ "MatCopy_MPIAIJ"
2065 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2066 {
2067   PetscErrorCode ierr;
2068   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2069   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2070 
2071   PetscFunctionBegin;
2072   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2073   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2074     /* because of the column compression in the off-processor part of the matrix a->B,
2075        the number of columns in a->B and b->B may be different, hence we cannot call
2076        the MatCopy() directly on the two parts. If need be, we can provide a more
2077        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2078        then copying the submatrices */
2079     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2080   } else {
2081     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2082     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2083   }
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatSetUp_MPIAIJ"
2089 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2090 {
2091   PetscErrorCode ierr;
2092 
2093   PetscFunctionBegin;
2094   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 #undef __FUNCT__
2099 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2100 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2101 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2102 {
2103   PetscInt       i,m=Y->rmap->N;
2104   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2105   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2106   const PetscInt *xi = x->i,*yi = y->i;
2107 
2108   PetscFunctionBegin;
2109   /* Set the number of nonzeros in the new matrix */
2110   for (i=0; i<m; i++) {
2111     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2112     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2113     nnz[i] = 0;
2114     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2115       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2116       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2117       nnz[i]++;
2118     }
2119     for (; k<nzy; k++) nnz[i]++;
2120   }
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 #undef __FUNCT__
2125 #define __FUNCT__ "MatAXPY_MPIAIJ"
2126 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2127 {
2128   PetscErrorCode ierr;
2129   PetscInt       i;
2130   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2131   PetscBLASInt   bnz,one=1;
2132   Mat_SeqAIJ     *x,*y;
2133 
2134   PetscFunctionBegin;
2135   if (str == SAME_NONZERO_PATTERN) {
2136     PetscScalar alpha = a;
2137     x    = (Mat_SeqAIJ*)xx->A->data;
2138     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2139     y    = (Mat_SeqAIJ*)yy->A->data;
2140     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2141     x    = (Mat_SeqAIJ*)xx->B->data;
2142     y    = (Mat_SeqAIJ*)yy->B->data;
2143     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2144     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2145     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2146   } else if (str == SUBSET_NONZERO_PATTERN) {
2147     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2148 
2149     x = (Mat_SeqAIJ*)xx->B->data;
2150     y = (Mat_SeqAIJ*)yy->B->data;
2151     if (y->xtoy && y->XtoY != xx->B) {
2152       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2153       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2154     }
2155     if (!y->xtoy) { /* get xtoy */
2156       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2157       y->XtoY = xx->B;
2158       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2159     }
2160     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2161     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2162   } else {
2163     Mat      B;
2164     PetscInt *nnz_d,*nnz_o;
2165     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2166     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2167     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2168     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2169     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2170     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2171     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2172     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2173     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2174     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2175     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2176     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2177     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2178     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2179   }
2180   PetscFunctionReturn(0);
2181 }
2182 
2183 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2184 
2185 #undef __FUNCT__
2186 #define __FUNCT__ "MatConjugate_MPIAIJ"
2187 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2188 {
2189 #if defined(PETSC_USE_COMPLEX)
2190   PetscErrorCode ierr;
2191   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2192 
2193   PetscFunctionBegin;
2194   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2195   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2196 #else
2197   PetscFunctionBegin;
2198 #endif
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 #undef __FUNCT__
2203 #define __FUNCT__ "MatRealPart_MPIAIJ"
2204 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2205 {
2206   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2207   PetscErrorCode ierr;
2208 
2209   PetscFunctionBegin;
2210   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2211   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2212   PetscFunctionReturn(0);
2213 }
2214 
2215 #undef __FUNCT__
2216 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2217 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2218 {
2219   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2220   PetscErrorCode ierr;
2221 
2222   PetscFunctionBegin;
2223   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2224   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2225   PetscFunctionReturn(0);
2226 }
2227 
2228 #if defined(PETSC_HAVE_PBGL)
2229 
2230 #include <boost/parallel/mpi/bsp_process_group.hpp>
2231 #include <boost/graph/distributed/ilu_default_graph.hpp>
2232 #include <boost/graph/distributed/ilu_0_block.hpp>
2233 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2234 #include <boost/graph/distributed/petsc/interface.hpp>
2235 #include <boost/multi_array.hpp>
2236 #include <boost/parallel/distributed_property_map->hpp>
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2240 /*
2241   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2242 */
2243 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2244 {
2245   namespace petsc = boost::distributed::petsc;
2246 
2247   namespace graph_dist = boost::graph::distributed;
2248   using boost::graph::distributed::ilu_default::process_group_type;
2249   using boost::graph::ilu_permuted;
2250 
2251   PetscBool      row_identity, col_identity;
2252   PetscContainer c;
2253   PetscInt       m, n, M, N;
2254   PetscErrorCode ierr;
2255 
2256   PetscFunctionBegin;
2257   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2258   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2259   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2260   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2261 
2262   process_group_type pg;
2263   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2264   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2265   lgraph_type& level_graph = *lgraph_p;
2266   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2267 
2268   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2269   ilu_permuted(level_graph);
2270 
2271   /* put together the new matrix */
2272   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2273   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2274   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2275   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2276   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2277   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2278   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2279   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2280 
2281   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2282   ierr = PetscContainerSetPointer(c, lgraph_p);
2283   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2284   ierr = PetscContainerDestroy(&c);
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 #undef __FUNCT__
2289 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2290 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2291 {
2292   PetscFunctionBegin;
2293   PetscFunctionReturn(0);
2294 }
2295 
2296 #undef __FUNCT__
2297 #define __FUNCT__ "MatSolve_MPIAIJ"
2298 /*
2299   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2300 */
2301 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2302 {
2303   namespace graph_dist = boost::graph::distributed;
2304 
2305   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2306   lgraph_type    *lgraph_p;
2307   PetscContainer c;
2308   PetscErrorCode ierr;
2309 
2310   PetscFunctionBegin;
2311   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2312   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2313   ierr = VecCopy(b, x);CHKERRQ(ierr);
2314 
2315   PetscScalar *array_x;
2316   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2317   PetscInt sx;
2318   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2319 
2320   PetscScalar *array_b;
2321   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2322   PetscInt sb;
2323   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2324 
2325   lgraph_type& level_graph = *lgraph_p;
2326   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2327 
2328   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2329   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2330   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2331 
2332   typedef boost::iterator_property_map<array_ref_type::iterator,
2333                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2334   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2335   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2336 
2337   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2338   PetscFunctionReturn(0);
2339 }
2340 #endif
2341 
2342 #undef __FUNCT__
2343 #define __FUNCT__ "MatDestroy_MatRedundant"
2344 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2345 {
2346   PetscErrorCode ierr;
2347   Mat_Redundant  *redund;
2348   PetscInt       i;
2349   PetscMPIInt    size;
2350 
2351   PetscFunctionBegin;
2352   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2353   if (size == 1) {
2354     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2355     redund = a->redundant;
2356   } else {
2357     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2358     redund = a->redundant;
2359   }
2360   if (redund){
2361     if (redund->matseq) { /* via MatGetSubMatrices()  */
2362       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2363       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2364       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2365       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2366     } else {
2367       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2368       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2369       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2370       for (i=0; i<redund->nrecvs; i++) {
2371         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2372         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2373       }
2374       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2375     }
2376 
2377     if (redund->psubcomm) {
2378       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2379     }
2380     ierr = redund->Destroy(A);CHKERRQ(ierr);
2381     ierr = PetscFree(redund);CHKERRQ(ierr);
2382   }
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 #undef __FUNCT__
2387 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2388 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2389 {
2390   PetscMPIInt    rank,size;
2391   MPI_Comm       comm;
2392   PetscErrorCode ierr;
2393   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2394   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2395   PetscInt       *rowrange = mat->rmap->range;
2396   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2397   Mat            A = aij->A,B=aij->B,C=*matredundant;
2398   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2399   PetscScalar    *sbuf_a;
2400   PetscInt       nzlocal=a->nz+b->nz;
2401   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2402   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2403   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2404   MatScalar      *aworkA,*aworkB;
2405   PetscScalar    *vals;
2406   PetscMPIInt    tag1,tag2,tag3,imdex;
2407   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2408   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2409   MPI_Status     recv_status,*send_status;
2410   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2411   PetscInt       **rbuf_j=NULL;
2412   PetscScalar    **rbuf_a=NULL;
2413   Mat_Redundant  *redund =NULL;
2414 
2415   PetscFunctionBegin;
2416   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2417   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2418   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2419   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2420   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2421 
2422   if (reuse == MAT_REUSE_MATRIX) {
2423     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2424     if (subsize == 1) {
2425       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2426       redund = c->redundant;
2427     } else {
2428       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2429       redund = c->redundant;
2430     }
2431     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2432 
2433     nsends    = redund->nsends;
2434     nrecvs    = redund->nrecvs;
2435     send_rank = redund->send_rank;
2436     recv_rank = redund->recv_rank;
2437     sbuf_nz   = redund->sbuf_nz;
2438     rbuf_nz   = redund->rbuf_nz;
2439     sbuf_j    = redund->sbuf_j;
2440     sbuf_a    = redund->sbuf_a;
2441     rbuf_j    = redund->rbuf_j;
2442     rbuf_a    = redund->rbuf_a;
2443   }
2444 
2445   if (reuse == MAT_INITIAL_MATRIX) {
2446     PetscInt    nleftover,np_subcomm;
2447 
2448     /* get the destination processors' id send_rank, nsends and nrecvs */
2449     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2450 
2451     np_subcomm = size/nsubcomm;
2452     nleftover  = size - nsubcomm*np_subcomm;
2453 
2454     /* block of codes below is specific for INTERLACED */
2455     /* ------------------------------------------------*/
2456     nsends = 0; nrecvs = 0;
2457     for (i=0; i<size; i++) {
2458       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2459         send_rank[nsends++] = i;
2460         recv_rank[nrecvs++] = i;
2461       }
2462     }
2463     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2464       i = size-nleftover-1;
2465       j = 0;
2466       while (j < nsubcomm - nleftover) {
2467         send_rank[nsends++] = i;
2468         i--; j++;
2469       }
2470     }
2471 
2472     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2473       for (i=0; i<nleftover; i++) {
2474         recv_rank[nrecvs++] = size-nleftover+i;
2475       }
2476     }
2477     /*----------------------------------------------*/
2478 
2479     /* allocate sbuf_j, sbuf_a */
2480     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2481     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2482     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2483     /*
2484     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2485     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2486      */
2487   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2488 
2489   /* copy mat's local entries into the buffers */
2490   if (reuse == MAT_INITIAL_MATRIX) {
2491     rownz_max = 0;
2492     rptr      = sbuf_j;
2493     cols      = sbuf_j + rend-rstart + 1;
2494     vals      = sbuf_a;
2495     rptr[0]   = 0;
2496     for (i=0; i<rend-rstart; i++) {
2497       row    = i + rstart;
2498       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2499       ncols  = nzA + nzB;
2500       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2501       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2502       /* load the column indices for this row into cols */
2503       lwrite = 0;
2504       for (l=0; l<nzB; l++) {
2505         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2506           vals[lwrite]   = aworkB[l];
2507           cols[lwrite++] = ctmp;
2508         }
2509       }
2510       for (l=0; l<nzA; l++) {
2511         vals[lwrite]   = aworkA[l];
2512         cols[lwrite++] = cstart + cworkA[l];
2513       }
2514       for (l=0; l<nzB; l++) {
2515         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2516           vals[lwrite]   = aworkB[l];
2517           cols[lwrite++] = ctmp;
2518         }
2519       }
2520       vals     += ncols;
2521       cols     += ncols;
2522       rptr[i+1] = rptr[i] + ncols;
2523       if (rownz_max < ncols) rownz_max = ncols;
2524     }
2525     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2526   } else { /* only copy matrix values into sbuf_a */
2527     rptr    = sbuf_j;
2528     vals    = sbuf_a;
2529     rptr[0] = 0;
2530     for (i=0; i<rend-rstart; i++) {
2531       row    = i + rstart;
2532       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2533       ncols  = nzA + nzB;
2534       cworkB = b->j + b->i[i];
2535       aworkA = a->a + a->i[i];
2536       aworkB = b->a + b->i[i];
2537       lwrite = 0;
2538       for (l=0; l<nzB; l++) {
2539         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2540       }
2541       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2542       for (l=0; l<nzB; l++) {
2543         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2544       }
2545       vals     += ncols;
2546       rptr[i+1] = rptr[i] + ncols;
2547     }
2548   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2549 
2550   /* send nzlocal to others, and recv other's nzlocal */
2551   /*--------------------------------------------------*/
2552   if (reuse == MAT_INITIAL_MATRIX) {
2553     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2554 
2555     s_waits2 = s_waits3 + nsends;
2556     s_waits1 = s_waits2 + nsends;
2557     r_waits1 = s_waits1 + nsends;
2558     r_waits2 = r_waits1 + nrecvs;
2559     r_waits3 = r_waits2 + nrecvs;
2560   } else {
2561     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2562 
2563     r_waits3 = s_waits3 + nsends;
2564   }
2565 
2566   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2567   if (reuse == MAT_INITIAL_MATRIX) {
2568     /* get new tags to keep the communication clean */
2569     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2570     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2571     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2572 
2573     /* post receives of other's nzlocal */
2574     for (i=0; i<nrecvs; i++) {
2575       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2576     }
2577     /* send nzlocal to others */
2578     for (i=0; i<nsends; i++) {
2579       sbuf_nz[i] = nzlocal;
2580       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2581     }
2582     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2583     count = nrecvs;
2584     while (count) {
2585       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2586 
2587       recv_rank[imdex] = recv_status.MPI_SOURCE;
2588       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2589       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2590 
2591       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2592 
2593       rbuf_nz[imdex] += i + 2;
2594 
2595       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2596       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2597       count--;
2598     }
2599     /* wait on sends of nzlocal */
2600     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2601     /* send mat->i,j to others, and recv from other's */
2602     /*------------------------------------------------*/
2603     for (i=0; i<nsends; i++) {
2604       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2605       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2606     }
2607     /* wait on receives of mat->i,j */
2608     /*------------------------------*/
2609     count = nrecvs;
2610     while (count) {
2611       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2612       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2613       count--;
2614     }
2615     /* wait on sends of mat->i,j */
2616     /*---------------------------*/
2617     if (nsends) {
2618       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2619     }
2620   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2621 
2622   /* post receives, send and receive mat->a */
2623   /*----------------------------------------*/
2624   for (imdex=0; imdex<nrecvs; imdex++) {
2625     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2626   }
2627   for (i=0; i<nsends; i++) {
2628     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2629   }
2630   count = nrecvs;
2631   while (count) {
2632     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2633     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2634     count--;
2635   }
2636   if (nsends) {
2637     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2638   }
2639 
2640   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2641 
2642   /* create redundant matrix */
2643   /*-------------------------*/
2644   if (reuse == MAT_INITIAL_MATRIX) {
2645     const PetscInt *range;
2646     PetscInt       rstart_sub,rend_sub,mloc_sub;
2647 
2648     /* compute rownz_max for preallocation */
2649     for (imdex=0; imdex<nrecvs; imdex++) {
2650       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2651       rptr = rbuf_j[imdex];
2652       for (i=0; i<j; i++) {
2653         ncols = rptr[i+1] - rptr[i];
2654         if (rownz_max < ncols) rownz_max = ncols;
2655       }
2656     }
2657 
2658     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2659 
2660     /* get local size of redundant matrix
2661        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2662     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2663     rstart_sub = range[nsubcomm*subrank];
2664     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2665       rend_sub = range[nsubcomm*(subrank+1)];
2666     } else {
2667       rend_sub = mat->rmap->N;
2668     }
2669     mloc_sub = rend_sub - rstart_sub;
2670 
2671     if (M == N) {
2672       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2673     } else { /* non-square matrix */
2674       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2675     }
2676     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2677     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2678     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2679     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2680   } else {
2681     C = *matredundant;
2682   }
2683 
2684   /* insert local matrix entries */
2685   rptr = sbuf_j;
2686   cols = sbuf_j + rend-rstart + 1;
2687   vals = sbuf_a;
2688   for (i=0; i<rend-rstart; i++) {
2689     row   = i + rstart;
2690     ncols = rptr[i+1] - rptr[i];
2691     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2692     vals += ncols;
2693     cols += ncols;
2694   }
2695   /* insert received matrix entries */
2696   for (imdex=0; imdex<nrecvs; imdex++) {
2697     rstart = rowrange[recv_rank[imdex]];
2698     rend   = rowrange[recv_rank[imdex]+1];
2699     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2700     rptr   = rbuf_j[imdex];
2701     cols   = rbuf_j[imdex] + rend-rstart + 1;
2702     vals   = rbuf_a[imdex];
2703     for (i=0; i<rend-rstart; i++) {
2704       row   = i + rstart;
2705       ncols = rptr[i+1] - rptr[i];
2706       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2707       vals += ncols;
2708       cols += ncols;
2709     }
2710   }
2711   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2712   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2713 
2714   if (reuse == MAT_INITIAL_MATRIX) {
2715     *matredundant = C;
2716 
2717     /* create a supporting struct and attach it to C for reuse */
2718     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2719     if (subsize == 1) {
2720       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2721       c->redundant = redund;
2722     } else {
2723       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2724       c->redundant = redund;
2725     }
2726 
2727     redund->nzlocal   = nzlocal;
2728     redund->nsends    = nsends;
2729     redund->nrecvs    = nrecvs;
2730     redund->send_rank = send_rank;
2731     redund->recv_rank = recv_rank;
2732     redund->sbuf_nz   = sbuf_nz;
2733     redund->rbuf_nz   = rbuf_nz;
2734     redund->sbuf_j    = sbuf_j;
2735     redund->sbuf_a    = sbuf_a;
2736     redund->rbuf_j    = rbuf_j;
2737     redund->rbuf_a    = rbuf_a;
2738     redund->psubcomm  = NULL;
2739 
2740     redund->Destroy = C->ops->destroy;
2741     C->ops->destroy = MatDestroy_MatRedundant;
2742   }
2743   PetscFunctionReturn(0);
2744 }
2745 
2746 #undef __FUNCT__
2747 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2748 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2749 {
2750   PetscErrorCode ierr;
2751   MPI_Comm       comm;
2752   PetscMPIInt    size,subsize;
2753   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2754   Mat_Redundant  *redund=NULL;
2755   PetscSubcomm   psubcomm=NULL;
2756   MPI_Comm       subcomm_in=subcomm;
2757   Mat            *matseq;
2758   IS             isrow,iscol;
2759 
2760   PetscFunctionBegin;
2761   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2762     if (reuse ==  MAT_INITIAL_MATRIX) {
2763       /* create psubcomm, then get subcomm */
2764       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2765       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2766       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2767 
2768       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2769       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2770       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2771       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2772       subcomm = psubcomm->comm;
2773     } else { /* retrieve psubcomm and subcomm */
2774       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2775       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2776       if (subsize == 1) {
2777         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2778         redund = c->redundant;
2779       } else {
2780         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2781         redund = c->redundant;
2782       }
2783       psubcomm = redund->psubcomm;
2784     }
2785     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2786       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2787       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2788         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2789         if (subsize == 1) {
2790           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2791           c->redundant->psubcomm = psubcomm;
2792         } else {
2793           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2794           c->redundant->psubcomm = psubcomm ;
2795         }
2796       }
2797       PetscFunctionReturn(0);
2798     }
2799   }
2800 
2801   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2802   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2803   if (reuse == MAT_INITIAL_MATRIX) {
2804     /* create a local sequential matrix matseq[0] */
2805     mloc_sub = PETSC_DECIDE;
2806     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2807     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2808     rstart = rend - mloc_sub;
2809     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2810     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2811   } else { /* reuse == MAT_REUSE_MATRIX */
2812     if (subsize == 1) {
2813       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2814       redund = c->redundant;
2815     } else {
2816       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2817       redund = c->redundant;
2818     }
2819 
2820     isrow  = redund->isrow;
2821     iscol  = redund->iscol;
2822     matseq = redund->matseq;
2823   }
2824   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2825   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2826 
2827   if (reuse == MAT_INITIAL_MATRIX) {
2828     /* create a supporting struct and attach it to C for reuse */
2829     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2830     if (subsize == 1) {
2831       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2832       c->redundant = redund;
2833     } else {
2834       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2835       c->redundant = redund;
2836     }
2837     redund->isrow    = isrow;
2838     redund->iscol    = iscol;
2839     redund->matseq   = matseq;
2840     redund->psubcomm = psubcomm;
2841     redund->Destroy               = (*matredundant)->ops->destroy;
2842     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2843   }
2844   PetscFunctionReturn(0);
2845 }
2846 
2847 #undef __FUNCT__
2848 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2849 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2850 {
2851   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2852   PetscErrorCode ierr;
2853   PetscInt       i,*idxb = 0;
2854   PetscScalar    *va,*vb;
2855   Vec            vtmp;
2856 
2857   PetscFunctionBegin;
2858   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2859   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2860   if (idx) {
2861     for (i=0; i<A->rmap->n; i++) {
2862       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2863     }
2864   }
2865 
2866   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2867   if (idx) {
2868     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2869   }
2870   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2871   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2872 
2873   for (i=0; i<A->rmap->n; i++) {
2874     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2875       va[i] = vb[i];
2876       if (idx) idx[i] = a->garray[idxb[i]];
2877     }
2878   }
2879 
2880   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2881   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2882   ierr = PetscFree(idxb);CHKERRQ(ierr);
2883   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2884   PetscFunctionReturn(0);
2885 }
2886 
2887 #undef __FUNCT__
2888 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2889 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2890 {
2891   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2892   PetscErrorCode ierr;
2893   PetscInt       i,*idxb = 0;
2894   PetscScalar    *va,*vb;
2895   Vec            vtmp;
2896 
2897   PetscFunctionBegin;
2898   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2899   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2900   if (idx) {
2901     for (i=0; i<A->cmap->n; i++) {
2902       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2903     }
2904   }
2905 
2906   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2907   if (idx) {
2908     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2909   }
2910   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2911   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2912 
2913   for (i=0; i<A->rmap->n; i++) {
2914     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2915       va[i] = vb[i];
2916       if (idx) idx[i] = a->garray[idxb[i]];
2917     }
2918   }
2919 
2920   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2921   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2922   ierr = PetscFree(idxb);CHKERRQ(ierr);
2923   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2924   PetscFunctionReturn(0);
2925 }
2926 
2927 #undef __FUNCT__
2928 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2929 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2930 {
2931   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2932   PetscInt       n      = A->rmap->n;
2933   PetscInt       cstart = A->cmap->rstart;
2934   PetscInt       *cmap  = mat->garray;
2935   PetscInt       *diagIdx, *offdiagIdx;
2936   Vec            diagV, offdiagV;
2937   PetscScalar    *a, *diagA, *offdiagA;
2938   PetscInt       r;
2939   PetscErrorCode ierr;
2940 
2941   PetscFunctionBegin;
2942   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2943   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2944   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2945   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2946   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2947   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2948   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2949   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2950   for (r = 0; r < n; ++r) {
2951     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2952       a[r]   = diagA[r];
2953       idx[r] = cstart + diagIdx[r];
2954     } else {
2955       a[r]   = offdiagA[r];
2956       idx[r] = cmap[offdiagIdx[r]];
2957     }
2958   }
2959   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2960   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2961   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2962   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2963   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2964   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2965   PetscFunctionReturn(0);
2966 }
2967 
2968 #undef __FUNCT__
2969 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2970 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2971 {
2972   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2973   PetscInt       n      = A->rmap->n;
2974   PetscInt       cstart = A->cmap->rstart;
2975   PetscInt       *cmap  = mat->garray;
2976   PetscInt       *diagIdx, *offdiagIdx;
2977   Vec            diagV, offdiagV;
2978   PetscScalar    *a, *diagA, *offdiagA;
2979   PetscInt       r;
2980   PetscErrorCode ierr;
2981 
2982   PetscFunctionBegin;
2983   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2984   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2985   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2986   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2987   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2988   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2989   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2990   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2991   for (r = 0; r < n; ++r) {
2992     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2993       a[r]   = diagA[r];
2994       idx[r] = cstart + diagIdx[r];
2995     } else {
2996       a[r]   = offdiagA[r];
2997       idx[r] = cmap[offdiagIdx[r]];
2998     }
2999   }
3000   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3001   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3002   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3003   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3004   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3005   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3006   PetscFunctionReturn(0);
3007 }
3008 
3009 #undef __FUNCT__
3010 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3011 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3012 {
3013   PetscErrorCode ierr;
3014   Mat            *dummy;
3015 
3016   PetscFunctionBegin;
3017   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3018   *newmat = *dummy;
3019   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3020   PetscFunctionReturn(0);
3021 }
3022 
3023 #undef __FUNCT__
3024 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3025 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3026 {
3027   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3028   PetscErrorCode ierr;
3029 
3030   PetscFunctionBegin;
3031   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3032   PetscFunctionReturn(0);
3033 }
3034 
3035 #undef __FUNCT__
3036 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3037 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3038 {
3039   PetscErrorCode ierr;
3040   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3041 
3042   PetscFunctionBegin;
3043   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3044   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3045   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3046   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3047   PetscFunctionReturn(0);
3048 }
3049 
3050 /* -------------------------------------------------------------------*/
3051 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3052                                        MatGetRow_MPIAIJ,
3053                                        MatRestoreRow_MPIAIJ,
3054                                        MatMult_MPIAIJ,
3055                                 /* 4*/ MatMultAdd_MPIAIJ,
3056                                        MatMultTranspose_MPIAIJ,
3057                                        MatMultTransposeAdd_MPIAIJ,
3058 #if defined(PETSC_HAVE_PBGL)
3059                                        MatSolve_MPIAIJ,
3060 #else
3061                                        0,
3062 #endif
3063                                        0,
3064                                        0,
3065                                 /*10*/ 0,
3066                                        0,
3067                                        0,
3068                                        MatSOR_MPIAIJ,
3069                                        MatTranspose_MPIAIJ,
3070                                 /*15*/ MatGetInfo_MPIAIJ,
3071                                        MatEqual_MPIAIJ,
3072                                        MatGetDiagonal_MPIAIJ,
3073                                        MatDiagonalScale_MPIAIJ,
3074                                        MatNorm_MPIAIJ,
3075                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3076                                        MatAssemblyEnd_MPIAIJ,
3077                                        MatSetOption_MPIAIJ,
3078                                        MatZeroEntries_MPIAIJ,
3079                                 /*24*/ MatZeroRows_MPIAIJ,
3080                                        0,
3081 #if defined(PETSC_HAVE_PBGL)
3082                                        0,
3083 #else
3084                                        0,
3085 #endif
3086                                        0,
3087                                        0,
3088                                 /*29*/ MatSetUp_MPIAIJ,
3089 #if defined(PETSC_HAVE_PBGL)
3090                                        0,
3091 #else
3092                                        0,
3093 #endif
3094                                        0,
3095                                        0,
3096                                        0,
3097                                 /*34*/ MatDuplicate_MPIAIJ,
3098                                        0,
3099                                        0,
3100                                        0,
3101                                        0,
3102                                 /*39*/ MatAXPY_MPIAIJ,
3103                                        MatGetSubMatrices_MPIAIJ,
3104                                        MatIncreaseOverlap_MPIAIJ,
3105                                        MatGetValues_MPIAIJ,
3106                                        MatCopy_MPIAIJ,
3107                                 /*44*/ MatGetRowMax_MPIAIJ,
3108                                        MatScale_MPIAIJ,
3109                                        0,
3110                                        0,
3111                                        MatZeroRowsColumns_MPIAIJ,
3112                                 /*49*/ MatSetRandom_MPIAIJ,
3113                                        0,
3114                                        0,
3115                                        0,
3116                                        0,
3117                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3118                                        0,
3119                                        MatSetUnfactored_MPIAIJ,
3120                                        MatPermute_MPIAIJ,
3121                                        0,
3122                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3123                                        MatDestroy_MPIAIJ,
3124                                        MatView_MPIAIJ,
3125                                        0,
3126                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3127                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3128                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3129                                        0,
3130                                        0,
3131                                        0,
3132                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3133                                        MatGetRowMinAbs_MPIAIJ,
3134                                        0,
3135                                        MatSetColoring_MPIAIJ,
3136                                        0,
3137                                        MatSetValuesAdifor_MPIAIJ,
3138                                 /*75*/ MatFDColoringApply_AIJ,
3139                                        0,
3140                                        0,
3141                                        0,
3142                                        MatFindZeroDiagonals_MPIAIJ,
3143                                 /*80*/ 0,
3144                                        0,
3145                                        0,
3146                                 /*83*/ MatLoad_MPIAIJ,
3147                                        0,
3148                                        0,
3149                                        0,
3150                                        0,
3151                                        0,
3152                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3153                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3154                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3155                                        MatPtAP_MPIAIJ_MPIAIJ,
3156                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3157                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3158                                        0,
3159                                        0,
3160                                        0,
3161                                        0,
3162                                 /*99*/ 0,
3163                                        0,
3164                                        0,
3165                                        MatConjugate_MPIAIJ,
3166                                        0,
3167                                 /*104*/MatSetValuesRow_MPIAIJ,
3168                                        MatRealPart_MPIAIJ,
3169                                        MatImaginaryPart_MPIAIJ,
3170                                        0,
3171                                        0,
3172                                 /*109*/0,
3173                                        MatGetRedundantMatrix_MPIAIJ,
3174                                        MatGetRowMin_MPIAIJ,
3175                                        0,
3176                                        0,
3177                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3178                                        0,
3179                                        0,
3180                                        0,
3181                                        0,
3182                                 /*119*/0,
3183                                        0,
3184                                        0,
3185                                        0,
3186                                        MatGetMultiProcBlock_MPIAIJ,
3187                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3188                                        MatGetColumnNorms_MPIAIJ,
3189                                        MatInvertBlockDiagonal_MPIAIJ,
3190                                        0,
3191                                        MatGetSubMatricesParallel_MPIAIJ,
3192                                 /*129*/0,
3193                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3194                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3195                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3196                                        0,
3197                                 /*134*/0,
3198                                        0,
3199                                        0,
3200                                        0,
3201                                        0,
3202                                 /*139*/0,
3203                                        0,
3204                                        0,
3205                                        MatFDColoringSetUp_MPIXAIJ
3206 };
3207 
3208 /* ----------------------------------------------------------------------------------------*/
3209 
3210 #undef __FUNCT__
3211 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3212 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3213 {
3214   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3215   PetscErrorCode ierr;
3216 
3217   PetscFunctionBegin;
3218   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3219   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3220   PetscFunctionReturn(0);
3221 }
3222 
3223 #undef __FUNCT__
3224 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3225 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3226 {
3227   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3228   PetscErrorCode ierr;
3229 
3230   PetscFunctionBegin;
3231   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3232   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3233   PetscFunctionReturn(0);
3234 }
3235 
3236 #undef __FUNCT__
3237 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3238 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3239 {
3240   Mat_MPIAIJ     *b;
3241   PetscErrorCode ierr;
3242 
3243   PetscFunctionBegin;
3244   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3245   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3246   b = (Mat_MPIAIJ*)B->data;
3247 
3248   if (!B->preallocated) {
3249     /* Explicitly create 2 MATSEQAIJ matrices. */
3250     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3251     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3252     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3253     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3254     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3255     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3256     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3257     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3258     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3259     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3260   }
3261 
3262   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3263   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3264   B->preallocated = PETSC_TRUE;
3265   PetscFunctionReturn(0);
3266 }
3267 
3268 #undef __FUNCT__
3269 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3270 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3271 {
3272   Mat            mat;
3273   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3274   PetscErrorCode ierr;
3275 
3276   PetscFunctionBegin;
3277   *newmat = 0;
3278   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3279   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3280   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3281   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3282   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3283   a       = (Mat_MPIAIJ*)mat->data;
3284 
3285   mat->factortype   = matin->factortype;
3286   mat->assembled    = PETSC_TRUE;
3287   mat->insertmode   = NOT_SET_VALUES;
3288   mat->preallocated = PETSC_TRUE;
3289 
3290   a->size         = oldmat->size;
3291   a->rank         = oldmat->rank;
3292   a->donotstash   = oldmat->donotstash;
3293   a->roworiented  = oldmat->roworiented;
3294   a->rowindices   = 0;
3295   a->rowvalues    = 0;
3296   a->getrowactive = PETSC_FALSE;
3297 
3298   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3299   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3300 
3301   if (oldmat->colmap) {
3302 #if defined(PETSC_USE_CTABLE)
3303     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3304 #else
3305     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3306     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3307     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3308 #endif
3309   } else a->colmap = 0;
3310   if (oldmat->garray) {
3311     PetscInt len;
3312     len  = oldmat->B->cmap->n;
3313     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3314     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3315     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3316   } else a->garray = 0;
3317 
3318   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3319   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3320   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3321   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3322   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3323   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3324   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3325   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3326   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3327   *newmat = mat;
3328   PetscFunctionReturn(0);
3329 }
3330 
3331 
3332 
3333 #undef __FUNCT__
3334 #define __FUNCT__ "MatLoad_MPIAIJ"
3335 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3336 {
3337   PetscScalar    *vals,*svals;
3338   MPI_Comm       comm;
3339   PetscErrorCode ierr;
3340   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3341   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3342   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3343   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3344   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3345   int            fd;
3346   PetscInt       bs = 1;
3347 
3348   PetscFunctionBegin;
3349   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3350   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3351   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3352   if (!rank) {
3353     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3354     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3355     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3356   }
3357 
3358   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3359   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3360   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3361 
3362   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3363 
3364   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3365   M    = header[1]; N = header[2];
3366   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3367   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3368   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3369 
3370   /* If global sizes are set, check if they are consistent with that given in the file */
3371   if (sizesset) {
3372     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3373   }
3374   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3375   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3376 
3377   /* determine ownership of all (block) rows */
3378   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3379   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3380   else m = newMat->rmap->n; /* Set by user */
3381 
3382   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3383   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3384 
3385   /* First process needs enough room for process with most rows */
3386   if (!rank) {
3387     mmax = rowners[1];
3388     for (i=2; i<=size; i++) {
3389       mmax = PetscMax(mmax, rowners[i]);
3390     }
3391   } else mmax = -1;             /* unused, but compilers complain */
3392 
3393   rowners[0] = 0;
3394   for (i=2; i<=size; i++) {
3395     rowners[i] += rowners[i-1];
3396   }
3397   rstart = rowners[rank];
3398   rend   = rowners[rank+1];
3399 
3400   /* distribute row lengths to all processors */
3401   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3402   if (!rank) {
3403     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3404     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3405     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3406     for (j=0; j<m; j++) {
3407       procsnz[0] += ourlens[j];
3408     }
3409     for (i=1; i<size; i++) {
3410       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3411       /* calculate the number of nonzeros on each processor */
3412       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3413         procsnz[i] += rowlengths[j];
3414       }
3415       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3416     }
3417     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3418   } else {
3419     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3420   }
3421 
3422   if (!rank) {
3423     /* determine max buffer needed and allocate it */
3424     maxnz = 0;
3425     for (i=0; i<size; i++) {
3426       maxnz = PetscMax(maxnz,procsnz[i]);
3427     }
3428     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3429 
3430     /* read in my part of the matrix column indices  */
3431     nz   = procsnz[0];
3432     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3433     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3434 
3435     /* read in every one elses and ship off */
3436     for (i=1; i<size; i++) {
3437       nz   = procsnz[i];
3438       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3439       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3440     }
3441     ierr = PetscFree(cols);CHKERRQ(ierr);
3442   } else {
3443     /* determine buffer space needed for message */
3444     nz = 0;
3445     for (i=0; i<m; i++) {
3446       nz += ourlens[i];
3447     }
3448     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3449 
3450     /* receive message of column indices*/
3451     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3452   }
3453 
3454   /* determine column ownership if matrix is not square */
3455   if (N != M) {
3456     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3457     else n = newMat->cmap->n;
3458     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3459     cstart = cend - n;
3460   } else {
3461     cstart = rstart;
3462     cend   = rend;
3463     n      = cend - cstart;
3464   }
3465 
3466   /* loop over local rows, determining number of off diagonal entries */
3467   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3468   jj   = 0;
3469   for (i=0; i<m; i++) {
3470     for (j=0; j<ourlens[i]; j++) {
3471       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3472       jj++;
3473     }
3474   }
3475 
3476   for (i=0; i<m; i++) {
3477     ourlens[i] -= offlens[i];
3478   }
3479   if (!sizesset) {
3480     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3481   }
3482 
3483   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3484 
3485   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3486 
3487   for (i=0; i<m; i++) {
3488     ourlens[i] += offlens[i];
3489   }
3490 
3491   if (!rank) {
3492     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3493 
3494     /* read in my part of the matrix numerical values  */
3495     nz   = procsnz[0];
3496     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3497 
3498     /* insert into matrix */
3499     jj      = rstart;
3500     smycols = mycols;
3501     svals   = vals;
3502     for (i=0; i<m; i++) {
3503       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3504       smycols += ourlens[i];
3505       svals   += ourlens[i];
3506       jj++;
3507     }
3508 
3509     /* read in other processors and ship out */
3510     for (i=1; i<size; i++) {
3511       nz   = procsnz[i];
3512       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3513       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3514     }
3515     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3516   } else {
3517     /* receive numeric values */
3518     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3519 
3520     /* receive message of values*/
3521     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3522 
3523     /* insert into matrix */
3524     jj      = rstart;
3525     smycols = mycols;
3526     svals   = vals;
3527     for (i=0; i<m; i++) {
3528       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3529       smycols += ourlens[i];
3530       svals   += ourlens[i];
3531       jj++;
3532     }
3533   }
3534   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3535   ierr = PetscFree(vals);CHKERRQ(ierr);
3536   ierr = PetscFree(mycols);CHKERRQ(ierr);
3537   ierr = PetscFree(rowners);CHKERRQ(ierr);
3538   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3539   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3540   PetscFunctionReturn(0);
3541 }
3542 
3543 #undef __FUNCT__
3544 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3545 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3546 {
3547   PetscErrorCode ierr;
3548   IS             iscol_local;
3549   PetscInt       csize;
3550 
3551   PetscFunctionBegin;
3552   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3553   if (call == MAT_REUSE_MATRIX) {
3554     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3555     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3556   } else {
3557     PetscInt cbs;
3558     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3559     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3560     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3561   }
3562   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3563   if (call == MAT_INITIAL_MATRIX) {
3564     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3565     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3566   }
3567   PetscFunctionReturn(0);
3568 }
3569 
3570 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3571 #undef __FUNCT__
3572 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3573 /*
3574     Not great since it makes two copies of the submatrix, first an SeqAIJ
3575   in local and then by concatenating the local matrices the end result.
3576   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3577 
3578   Note: This requires a sequential iscol with all indices.
3579 */
3580 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3581 {
3582   PetscErrorCode ierr;
3583   PetscMPIInt    rank,size;
3584   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3585   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3586   PetscBool      allcolumns, colflag;
3587   Mat            M,Mreuse;
3588   MatScalar      *vwork,*aa;
3589   MPI_Comm       comm;
3590   Mat_SeqAIJ     *aij;
3591 
3592   PetscFunctionBegin;
3593   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3594   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3595   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3596 
3597   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3598   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3599   if (colflag && ncol == mat->cmap->N) {
3600     allcolumns = PETSC_TRUE;
3601   } else {
3602     allcolumns = PETSC_FALSE;
3603   }
3604   if (call ==  MAT_REUSE_MATRIX) {
3605     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3606     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3607     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3608   } else {
3609     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3610   }
3611 
3612   /*
3613       m - number of local rows
3614       n - number of columns (same on all processors)
3615       rstart - first row in new global matrix generated
3616   */
3617   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3618   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3619   if (call == MAT_INITIAL_MATRIX) {
3620     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3621     ii  = aij->i;
3622     jj  = aij->j;
3623 
3624     /*
3625         Determine the number of non-zeros in the diagonal and off-diagonal
3626         portions of the matrix in order to do correct preallocation
3627     */
3628 
3629     /* first get start and end of "diagonal" columns */
3630     if (csize == PETSC_DECIDE) {
3631       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3632       if (mglobal == n) { /* square matrix */
3633         nlocal = m;
3634       } else {
3635         nlocal = n/size + ((n % size) > rank);
3636       }
3637     } else {
3638       nlocal = csize;
3639     }
3640     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3641     rstart = rend - nlocal;
3642     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3643 
3644     /* next, compute all the lengths */
3645     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3646     olens = dlens + m;
3647     for (i=0; i<m; i++) {
3648       jend = ii[i+1] - ii[i];
3649       olen = 0;
3650       dlen = 0;
3651       for (j=0; j<jend; j++) {
3652         if (*jj < rstart || *jj >= rend) olen++;
3653         else dlen++;
3654         jj++;
3655       }
3656       olens[i] = olen;
3657       dlens[i] = dlen;
3658     }
3659     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3660     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3661     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3662     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3663     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3664     ierr = PetscFree(dlens);CHKERRQ(ierr);
3665   } else {
3666     PetscInt ml,nl;
3667 
3668     M    = *newmat;
3669     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3670     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3671     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3672     /*
3673          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3674        rather than the slower MatSetValues().
3675     */
3676     M->was_assembled = PETSC_TRUE;
3677     M->assembled     = PETSC_FALSE;
3678   }
3679   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3680   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3681   ii   = aij->i;
3682   jj   = aij->j;
3683   aa   = aij->a;
3684   for (i=0; i<m; i++) {
3685     row   = rstart + i;
3686     nz    = ii[i+1] - ii[i];
3687     cwork = jj;     jj += nz;
3688     vwork = aa;     aa += nz;
3689     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3690   }
3691 
3692   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3693   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3694   *newmat = M;
3695 
3696   /* save submatrix used in processor for next request */
3697   if (call ==  MAT_INITIAL_MATRIX) {
3698     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3699     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3700   }
3701   PetscFunctionReturn(0);
3702 }
3703 
3704 #undef __FUNCT__
3705 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3706 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3707 {
3708   PetscInt       m,cstart, cend,j,nnz,i,d;
3709   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3710   const PetscInt *JJ;
3711   PetscScalar    *values;
3712   PetscErrorCode ierr;
3713 
3714   PetscFunctionBegin;
3715   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3716 
3717   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3718   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3719   m      = B->rmap->n;
3720   cstart = B->cmap->rstart;
3721   cend   = B->cmap->rend;
3722   rstart = B->rmap->rstart;
3723 
3724   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3725 
3726 #if defined(PETSC_USE_DEBUGGING)
3727   for (i=0; i<m; i++) {
3728     nnz = Ii[i+1]- Ii[i];
3729     JJ  = J + Ii[i];
3730     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3731     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3732     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3733   }
3734 #endif
3735 
3736   for (i=0; i<m; i++) {
3737     nnz     = Ii[i+1]- Ii[i];
3738     JJ      = J + Ii[i];
3739     nnz_max = PetscMax(nnz_max,nnz);
3740     d       = 0;
3741     for (j=0; j<nnz; j++) {
3742       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3743     }
3744     d_nnz[i] = d;
3745     o_nnz[i] = nnz - d;
3746   }
3747   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3748   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3749 
3750   if (v) values = (PetscScalar*)v;
3751   else {
3752     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3753   }
3754 
3755   for (i=0; i<m; i++) {
3756     ii   = i + rstart;
3757     nnz  = Ii[i+1]- Ii[i];
3758     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3759   }
3760   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3761   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3762 
3763   if (!v) {
3764     ierr = PetscFree(values);CHKERRQ(ierr);
3765   }
3766   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3767   PetscFunctionReturn(0);
3768 }
3769 
3770 #undef __FUNCT__
3771 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3772 /*@
3773    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3774    (the default parallel PETSc format).
3775 
3776    Collective on MPI_Comm
3777 
3778    Input Parameters:
3779 +  B - the matrix
3780 .  i - the indices into j for the start of each local row (starts with zero)
3781 .  j - the column indices for each local row (starts with zero)
3782 -  v - optional values in the matrix
3783 
3784    Level: developer
3785 
3786    Notes:
3787        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3788      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3789      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3790 
3791        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3792 
3793        The format which is used for the sparse matrix input, is equivalent to a
3794     row-major ordering.. i.e for the following matrix, the input data expected is
3795     as shown:
3796 
3797         1 0 0
3798         2 0 3     P0
3799        -------
3800         4 5 6     P1
3801 
3802      Process0 [P0]: rows_owned=[0,1]
3803         i =  {0,1,3}  [size = nrow+1  = 2+1]
3804         j =  {0,0,2}  [size = nz = 6]
3805         v =  {1,2,3}  [size = nz = 6]
3806 
3807      Process1 [P1]: rows_owned=[2]
3808         i =  {0,3}    [size = nrow+1  = 1+1]
3809         j =  {0,1,2}  [size = nz = 6]
3810         v =  {4,5,6}  [size = nz = 6]
3811 
3812 .keywords: matrix, aij, compressed row, sparse, parallel
3813 
3814 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3815           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3816 @*/
3817 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3818 {
3819   PetscErrorCode ierr;
3820 
3821   PetscFunctionBegin;
3822   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3823   PetscFunctionReturn(0);
3824 }
3825 
3826 #undef __FUNCT__
3827 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3828 /*@C
3829    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3830    (the default parallel PETSc format).  For good matrix assembly performance
3831    the user should preallocate the matrix storage by setting the parameters
3832    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3833    performance can be increased by more than a factor of 50.
3834 
3835    Collective on MPI_Comm
3836 
3837    Input Parameters:
3838 +  A - the matrix
3839 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3840            (same value is used for all local rows)
3841 .  d_nnz - array containing the number of nonzeros in the various rows of the
3842            DIAGONAL portion of the local submatrix (possibly different for each row)
3843            or NULL, if d_nz is used to specify the nonzero structure.
3844            The size of this array is equal to the number of local rows, i.e 'm'.
3845            For matrices that will be factored, you must leave room for (and set)
3846            the diagonal entry even if it is zero.
3847 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3848            submatrix (same value is used for all local rows).
3849 -  o_nnz - array containing the number of nonzeros in the various rows of the
3850            OFF-DIAGONAL portion of the local submatrix (possibly different for
3851            each row) or NULL, if o_nz is used to specify the nonzero
3852            structure. The size of this array is equal to the number
3853            of local rows, i.e 'm'.
3854 
3855    If the *_nnz parameter is given then the *_nz parameter is ignored
3856 
3857    The AIJ format (also called the Yale sparse matrix format or
3858    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3859    storage.  The stored row and column indices begin with zero.
3860    See Users-Manual: ch_mat for details.
3861 
3862    The parallel matrix is partitioned such that the first m0 rows belong to
3863    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3864    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3865 
3866    The DIAGONAL portion of the local submatrix of a processor can be defined
3867    as the submatrix which is obtained by extraction the part corresponding to
3868    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3869    first row that belongs to the processor, r2 is the last row belonging to
3870    the this processor, and c1-c2 is range of indices of the local part of a
3871    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3872    common case of a square matrix, the row and column ranges are the same and
3873    the DIAGONAL part is also square. The remaining portion of the local
3874    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3875 
3876    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3877 
3878    You can call MatGetInfo() to get information on how effective the preallocation was;
3879    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3880    You can also run with the option -info and look for messages with the string
3881    malloc in them to see if additional memory allocation was needed.
3882 
3883    Example usage:
3884 
3885    Consider the following 8x8 matrix with 34 non-zero values, that is
3886    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3887    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3888    as follows:
3889 
3890 .vb
3891             1  2  0  |  0  3  0  |  0  4
3892     Proc0   0  5  6  |  7  0  0  |  8  0
3893             9  0 10  | 11  0  0  | 12  0
3894     -------------------------------------
3895            13  0 14  | 15 16 17  |  0  0
3896     Proc1   0 18  0  | 19 20 21  |  0  0
3897             0  0  0  | 22 23  0  | 24  0
3898     -------------------------------------
3899     Proc2  25 26 27  |  0  0 28  | 29  0
3900            30  0  0  | 31 32 33  |  0 34
3901 .ve
3902 
3903    This can be represented as a collection of submatrices as:
3904 
3905 .vb
3906       A B C
3907       D E F
3908       G H I
3909 .ve
3910 
3911    Where the submatrices A,B,C are owned by proc0, D,E,F are
3912    owned by proc1, G,H,I are owned by proc2.
3913 
3914    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3915    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3916    The 'M','N' parameters are 8,8, and have the same values on all procs.
3917 
3918    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3919    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3920    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3921    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3922    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3923    matrix, ans [DF] as another SeqAIJ matrix.
3924 
3925    When d_nz, o_nz parameters are specified, d_nz storage elements are
3926    allocated for every row of the local diagonal submatrix, and o_nz
3927    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3928    One way to choose d_nz and o_nz is to use the max nonzerors per local
3929    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3930    In this case, the values of d_nz,o_nz are:
3931 .vb
3932      proc0 : dnz = 2, o_nz = 2
3933      proc1 : dnz = 3, o_nz = 2
3934      proc2 : dnz = 1, o_nz = 4
3935 .ve
3936    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3937    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3938    for proc3. i.e we are using 12+15+10=37 storage locations to store
3939    34 values.
3940 
3941    When d_nnz, o_nnz parameters are specified, the storage is specified
3942    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3943    In the above case the values for d_nnz,o_nnz are:
3944 .vb
3945      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3946      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3947      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3948 .ve
3949    Here the space allocated is sum of all the above values i.e 34, and
3950    hence pre-allocation is perfect.
3951 
3952    Level: intermediate
3953 
3954 .keywords: matrix, aij, compressed row, sparse, parallel
3955 
3956 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3957           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3958 @*/
3959 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3960 {
3961   PetscErrorCode ierr;
3962 
3963   PetscFunctionBegin;
3964   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3965   PetscValidType(B,1);
3966   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3967   PetscFunctionReturn(0);
3968 }
3969 
3970 #undef __FUNCT__
3971 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3972 /*@
3973      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3974          CSR format the local rows.
3975 
3976    Collective on MPI_Comm
3977 
3978    Input Parameters:
3979 +  comm - MPI communicator
3980 .  m - number of local rows (Cannot be PETSC_DECIDE)
3981 .  n - This value should be the same as the local size used in creating the
3982        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3983        calculated if N is given) For square matrices n is almost always m.
3984 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3985 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3986 .   i - row indices
3987 .   j - column indices
3988 -   a - matrix values
3989 
3990    Output Parameter:
3991 .   mat - the matrix
3992 
3993    Level: intermediate
3994 
3995    Notes:
3996        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3997      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3998      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3999 
4000        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4001 
4002        The format which is used for the sparse matrix input, is equivalent to a
4003     row-major ordering.. i.e for the following matrix, the input data expected is
4004     as shown:
4005 
4006         1 0 0
4007         2 0 3     P0
4008        -------
4009         4 5 6     P1
4010 
4011      Process0 [P0]: rows_owned=[0,1]
4012         i =  {0,1,3}  [size = nrow+1  = 2+1]
4013         j =  {0,0,2}  [size = nz = 6]
4014         v =  {1,2,3}  [size = nz = 6]
4015 
4016      Process1 [P1]: rows_owned=[2]
4017         i =  {0,3}    [size = nrow+1  = 1+1]
4018         j =  {0,1,2}  [size = nz = 6]
4019         v =  {4,5,6}  [size = nz = 6]
4020 
4021 .keywords: matrix, aij, compressed row, sparse, parallel
4022 
4023 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4024           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4025 @*/
4026 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4027 {
4028   PetscErrorCode ierr;
4029 
4030   PetscFunctionBegin;
4031   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4032   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4033   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4034   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4035   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4036   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4037   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4038   PetscFunctionReturn(0);
4039 }
4040 
4041 #undef __FUNCT__
4042 #define __FUNCT__ "MatCreateAIJ"
4043 /*@C
4044    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4045    (the default parallel PETSc format).  For good matrix assembly performance
4046    the user should preallocate the matrix storage by setting the parameters
4047    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4048    performance can be increased by more than a factor of 50.
4049 
4050    Collective on MPI_Comm
4051 
4052    Input Parameters:
4053 +  comm - MPI communicator
4054 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4055            This value should be the same as the local size used in creating the
4056            y vector for the matrix-vector product y = Ax.
4057 .  n - This value should be the same as the local size used in creating the
4058        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4059        calculated if N is given) For square matrices n is almost always m.
4060 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4061 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4062 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4063            (same value is used for all local rows)
4064 .  d_nnz - array containing the number of nonzeros in the various rows of the
4065            DIAGONAL portion of the local submatrix (possibly different for each row)
4066            or NULL, if d_nz is used to specify the nonzero structure.
4067            The size of this array is equal to the number of local rows, i.e 'm'.
4068 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4069            submatrix (same value is used for all local rows).
4070 -  o_nnz - array containing the number of nonzeros in the various rows of the
4071            OFF-DIAGONAL portion of the local submatrix (possibly different for
4072            each row) or NULL, if o_nz is used to specify the nonzero
4073            structure. The size of this array is equal to the number
4074            of local rows, i.e 'm'.
4075 
4076    Output Parameter:
4077 .  A - the matrix
4078 
4079    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4080    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4081    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4082 
4083    Notes:
4084    If the *_nnz parameter is given then the *_nz parameter is ignored
4085 
4086    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4087    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4088    storage requirements for this matrix.
4089 
4090    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4091    processor than it must be used on all processors that share the object for
4092    that argument.
4093 
4094    The user MUST specify either the local or global matrix dimensions
4095    (possibly both).
4096 
4097    The parallel matrix is partitioned across processors such that the
4098    first m0 rows belong to process 0, the next m1 rows belong to
4099    process 1, the next m2 rows belong to process 2 etc.. where
4100    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4101    values corresponding to [m x N] submatrix.
4102 
4103    The columns are logically partitioned with the n0 columns belonging
4104    to 0th partition, the next n1 columns belonging to the next
4105    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4106 
4107    The DIAGONAL portion of the local submatrix on any given processor
4108    is the submatrix corresponding to the rows and columns m,n
4109    corresponding to the given processor. i.e diagonal matrix on
4110    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4111    etc. The remaining portion of the local submatrix [m x (N-n)]
4112    constitute the OFF-DIAGONAL portion. The example below better
4113    illustrates this concept.
4114 
4115    For a square global matrix we define each processor's diagonal portion
4116    to be its local rows and the corresponding columns (a square submatrix);
4117    each processor's off-diagonal portion encompasses the remainder of the
4118    local matrix (a rectangular submatrix).
4119 
4120    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4121 
4122    When calling this routine with a single process communicator, a matrix of
4123    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4124    type of communicator, use the construction mechanism:
4125      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4126 
4127    By default, this format uses inodes (identical nodes) when possible.
4128    We search for consecutive rows with the same nonzero structure, thereby
4129    reusing matrix information to achieve increased efficiency.
4130 
4131    Options Database Keys:
4132 +  -mat_no_inode  - Do not use inodes
4133 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4134 -  -mat_aij_oneindex - Internally use indexing starting at 1
4135         rather than 0.  Note that when calling MatSetValues(),
4136         the user still MUST index entries starting at 0!
4137 
4138 
4139    Example usage:
4140 
4141    Consider the following 8x8 matrix with 34 non-zero values, that is
4142    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4143    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4144    as follows:
4145 
4146 .vb
4147             1  2  0  |  0  3  0  |  0  4
4148     Proc0   0  5  6  |  7  0  0  |  8  0
4149             9  0 10  | 11  0  0  | 12  0
4150     -------------------------------------
4151            13  0 14  | 15 16 17  |  0  0
4152     Proc1   0 18  0  | 19 20 21  |  0  0
4153             0  0  0  | 22 23  0  | 24  0
4154     -------------------------------------
4155     Proc2  25 26 27  |  0  0 28  | 29  0
4156            30  0  0  | 31 32 33  |  0 34
4157 .ve
4158 
4159    This can be represented as a collection of submatrices as:
4160 
4161 .vb
4162       A B C
4163       D E F
4164       G H I
4165 .ve
4166 
4167    Where the submatrices A,B,C are owned by proc0, D,E,F are
4168    owned by proc1, G,H,I are owned by proc2.
4169 
4170    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4171    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4172    The 'M','N' parameters are 8,8, and have the same values on all procs.
4173 
4174    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4175    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4176    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4177    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4178    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4179    matrix, ans [DF] as another SeqAIJ matrix.
4180 
4181    When d_nz, o_nz parameters are specified, d_nz storage elements are
4182    allocated for every row of the local diagonal submatrix, and o_nz
4183    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4184    One way to choose d_nz and o_nz is to use the max nonzerors per local
4185    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4186    In this case, the values of d_nz,o_nz are:
4187 .vb
4188      proc0 : dnz = 2, o_nz = 2
4189      proc1 : dnz = 3, o_nz = 2
4190      proc2 : dnz = 1, o_nz = 4
4191 .ve
4192    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4193    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4194    for proc3. i.e we are using 12+15+10=37 storage locations to store
4195    34 values.
4196 
4197    When d_nnz, o_nnz parameters are specified, the storage is specified
4198    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4199    In the above case the values for d_nnz,o_nnz are:
4200 .vb
4201      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4202      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4203      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4204 .ve
4205    Here the space allocated is sum of all the above values i.e 34, and
4206    hence pre-allocation is perfect.
4207 
4208    Level: intermediate
4209 
4210 .keywords: matrix, aij, compressed row, sparse, parallel
4211 
4212 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4213           MPIAIJ, MatCreateMPIAIJWithArrays()
4214 @*/
4215 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4216 {
4217   PetscErrorCode ierr;
4218   PetscMPIInt    size;
4219 
4220   PetscFunctionBegin;
4221   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4222   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4223   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4224   if (size > 1) {
4225     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4226     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4227   } else {
4228     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4229     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4230   }
4231   PetscFunctionReturn(0);
4232 }
4233 
4234 #undef __FUNCT__
4235 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4236 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4237 {
4238   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4239 
4240   PetscFunctionBegin;
4241   *Ad     = a->A;
4242   *Ao     = a->B;
4243   *colmap = a->garray;
4244   PetscFunctionReturn(0);
4245 }
4246 
4247 #undef __FUNCT__
4248 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4249 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4250 {
4251   PetscErrorCode ierr;
4252   PetscInt       i;
4253   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4254 
4255   PetscFunctionBegin;
4256   if (coloring->ctype == IS_COLORING_GLOBAL) {
4257     ISColoringValue *allcolors,*colors;
4258     ISColoring      ocoloring;
4259 
4260     /* set coloring for diagonal portion */
4261     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4262 
4263     /* set coloring for off-diagonal portion */
4264     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4265     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4266     for (i=0; i<a->B->cmap->n; i++) {
4267       colors[i] = allcolors[a->garray[i]];
4268     }
4269     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4270     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4271     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4272     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4273   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4274     ISColoringValue *colors;
4275     PetscInt        *larray;
4276     ISColoring      ocoloring;
4277 
4278     /* set coloring for diagonal portion */
4279     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4280     for (i=0; i<a->A->cmap->n; i++) {
4281       larray[i] = i + A->cmap->rstart;
4282     }
4283     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4284     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4285     for (i=0; i<a->A->cmap->n; i++) {
4286       colors[i] = coloring->colors[larray[i]];
4287     }
4288     ierr = PetscFree(larray);CHKERRQ(ierr);
4289     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4290     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4291     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4292 
4293     /* set coloring for off-diagonal portion */
4294     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4295     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4296     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4297     for (i=0; i<a->B->cmap->n; i++) {
4298       colors[i] = coloring->colors[larray[i]];
4299     }
4300     ierr = PetscFree(larray);CHKERRQ(ierr);
4301     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4302     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4303     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4304   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4305   PetscFunctionReturn(0);
4306 }
4307 
4308 #undef __FUNCT__
4309 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4310 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4311 {
4312   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4313   PetscErrorCode ierr;
4314 
4315   PetscFunctionBegin;
4316   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4317   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4318   PetscFunctionReturn(0);
4319 }
4320 
4321 #undef __FUNCT__
4322 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4323 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4324 {
4325   PetscErrorCode ierr;
4326   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4327   PetscInt       *indx;
4328 
4329   PetscFunctionBegin;
4330   /* This routine will ONLY return MPIAIJ type matrix */
4331   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4332   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4333   if (n == PETSC_DECIDE) {
4334     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4335   }
4336   /* Check sum(n) = N */
4337   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4338   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4339 
4340   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4341   rstart -= m;
4342 
4343   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4344   for (i=0; i<m; i++) {
4345     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4346     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4347     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4348   }
4349 
4350   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4351   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4352   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4353   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4354   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4355   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4356   PetscFunctionReturn(0);
4357 }
4358 
4359 #undef __FUNCT__
4360 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4361 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4362 {
4363   PetscErrorCode ierr;
4364   PetscInt       m,N,i,rstart,nnz,Ii;
4365   PetscInt       *indx;
4366   PetscScalar    *values;
4367 
4368   PetscFunctionBegin;
4369   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4370   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4371   for (i=0; i<m; i++) {
4372     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4373     Ii   = i + rstart;
4374     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4375     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4376   }
4377   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4378   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4379   PetscFunctionReturn(0);
4380 }
4381 
4382 #undef __FUNCT__
4383 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4384 /*@
4385       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4386                  matrices from each processor
4387 
4388     Collective on MPI_Comm
4389 
4390    Input Parameters:
4391 +    comm - the communicators the parallel matrix will live on
4392 .    inmat - the input sequential matrices
4393 .    n - number of local columns (or PETSC_DECIDE)
4394 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4395 
4396    Output Parameter:
4397 .    outmat - the parallel matrix generated
4398 
4399     Level: advanced
4400 
4401    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4402 
4403 @*/
4404 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4405 {
4406   PetscErrorCode ierr;
4407   PetscMPIInt    size;
4408 
4409   PetscFunctionBegin;
4410   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4411   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4412   if (size == 1) {
4413     if (scall == MAT_INITIAL_MATRIX) {
4414       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4415     } else {
4416       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4417     }
4418   } else {
4419     if (scall == MAT_INITIAL_MATRIX) {
4420       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4421     }
4422     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4423   }
4424   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4425   PetscFunctionReturn(0);
4426 }
4427 
4428 #undef __FUNCT__
4429 #define __FUNCT__ "MatFileSplit"
4430 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4431 {
4432   PetscErrorCode    ierr;
4433   PetscMPIInt       rank;
4434   PetscInt          m,N,i,rstart,nnz;
4435   size_t            len;
4436   const PetscInt    *indx;
4437   PetscViewer       out;
4438   char              *name;
4439   Mat               B;
4440   const PetscScalar *values;
4441 
4442   PetscFunctionBegin;
4443   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4444   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4445   /* Should this be the type of the diagonal block of A? */
4446   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4447   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4448   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4449   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4450   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4451   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4452   for (i=0; i<m; i++) {
4453     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4454     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4455     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4456   }
4457   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4458   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4459 
4460   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4461   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4462   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4463   sprintf(name,"%s.%d",outfile,rank);
4464   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4465   ierr = PetscFree(name);CHKERRQ(ierr);
4466   ierr = MatView(B,out);CHKERRQ(ierr);
4467   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4468   ierr = MatDestroy(&B);CHKERRQ(ierr);
4469   PetscFunctionReturn(0);
4470 }
4471 
4472 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4473 #undef __FUNCT__
4474 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4475 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4476 {
4477   PetscErrorCode      ierr;
4478   Mat_Merge_SeqsToMPI *merge;
4479   PetscContainer      container;
4480 
4481   PetscFunctionBegin;
4482   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4483   if (container) {
4484     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4485     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4486     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4487     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4488     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4489     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4491     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4492     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4493     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4494     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4496     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4497     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4498     ierr = PetscFree(merge);CHKERRQ(ierr);
4499     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4500   }
4501   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4502   PetscFunctionReturn(0);
4503 }
4504 
4505 #include <../src/mat/utils/freespace.h>
4506 #include <petscbt.h>
4507 
4508 #undef __FUNCT__
4509 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4510 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4511 {
4512   PetscErrorCode      ierr;
4513   MPI_Comm            comm;
4514   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4515   PetscMPIInt         size,rank,taga,*len_s;
4516   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4517   PetscInt            proc,m;
4518   PetscInt            **buf_ri,**buf_rj;
4519   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4520   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4521   MPI_Request         *s_waits,*r_waits;
4522   MPI_Status          *status;
4523   MatScalar           *aa=a->a;
4524   MatScalar           **abuf_r,*ba_i;
4525   Mat_Merge_SeqsToMPI *merge;
4526   PetscContainer      container;
4527 
4528   PetscFunctionBegin;
4529   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4530   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4531 
4532   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4533   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4534 
4535   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4536   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4537 
4538   bi     = merge->bi;
4539   bj     = merge->bj;
4540   buf_ri = merge->buf_ri;
4541   buf_rj = merge->buf_rj;
4542 
4543   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4544   owners = merge->rowmap->range;
4545   len_s  = merge->len_s;
4546 
4547   /* send and recv matrix values */
4548   /*-----------------------------*/
4549   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4550   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4551 
4552   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4553   for (proc=0,k=0; proc<size; proc++) {
4554     if (!len_s[proc]) continue;
4555     i    = owners[proc];
4556     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4557     k++;
4558   }
4559 
4560   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4561   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4562   ierr = PetscFree(status);CHKERRQ(ierr);
4563 
4564   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4565   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4566 
4567   /* insert mat values of mpimat */
4568   /*----------------------------*/
4569   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4570   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4571 
4572   for (k=0; k<merge->nrecv; k++) {
4573     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4574     nrows       = *(buf_ri_k[k]);
4575     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4576     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4577   }
4578 
4579   /* set values of ba */
4580   m = merge->rowmap->n;
4581   for (i=0; i<m; i++) {
4582     arow = owners[rank] + i;
4583     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4584     bnzi = bi[i+1] - bi[i];
4585     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4586 
4587     /* add local non-zero vals of this proc's seqmat into ba */
4588     anzi   = ai[arow+1] - ai[arow];
4589     aj     = a->j + ai[arow];
4590     aa     = a->a + ai[arow];
4591     nextaj = 0;
4592     for (j=0; nextaj<anzi; j++) {
4593       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4594         ba_i[j] += aa[nextaj++];
4595       }
4596     }
4597 
4598     /* add received vals into ba */
4599     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4600       /* i-th row */
4601       if (i == *nextrow[k]) {
4602         anzi   = *(nextai[k]+1) - *nextai[k];
4603         aj     = buf_rj[k] + *(nextai[k]);
4604         aa     = abuf_r[k] + *(nextai[k]);
4605         nextaj = 0;
4606         for (j=0; nextaj<anzi; j++) {
4607           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4608             ba_i[j] += aa[nextaj++];
4609           }
4610         }
4611         nextrow[k]++; nextai[k]++;
4612       }
4613     }
4614     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4615   }
4616   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4617   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4618 
4619   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4620   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4621   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4622   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4623   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4624   PetscFunctionReturn(0);
4625 }
4626 
4627 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4628 
4629 #undef __FUNCT__
4630 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4631 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4632 {
4633   PetscErrorCode      ierr;
4634   Mat                 B_mpi;
4635   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4636   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4637   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4638   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4639   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4640   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4641   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4642   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4643   MPI_Status          *status;
4644   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4645   PetscBT             lnkbt;
4646   Mat_Merge_SeqsToMPI *merge;
4647   PetscContainer      container;
4648 
4649   PetscFunctionBegin;
4650   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4651 
4652   /* make sure it is a PETSc comm */
4653   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4654   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4655   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4656 
4657   ierr = PetscNew(&merge);CHKERRQ(ierr);
4658   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4659 
4660   /* determine row ownership */
4661   /*---------------------------------------------------------*/
4662   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4663   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4664   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4665   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4666   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4667   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4668   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4669 
4670   m      = merge->rowmap->n;
4671   owners = merge->rowmap->range;
4672 
4673   /* determine the number of messages to send, their lengths */
4674   /*---------------------------------------------------------*/
4675   len_s = merge->len_s;
4676 
4677   len          = 0; /* length of buf_si[] */
4678   merge->nsend = 0;
4679   for (proc=0; proc<size; proc++) {
4680     len_si[proc] = 0;
4681     if (proc == rank) {
4682       len_s[proc] = 0;
4683     } else {
4684       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4685       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4686     }
4687     if (len_s[proc]) {
4688       merge->nsend++;
4689       nrows = 0;
4690       for (i=owners[proc]; i<owners[proc+1]; i++) {
4691         if (ai[i+1] > ai[i]) nrows++;
4692       }
4693       len_si[proc] = 2*(nrows+1);
4694       len         += len_si[proc];
4695     }
4696   }
4697 
4698   /* determine the number and length of messages to receive for ij-structure */
4699   /*-------------------------------------------------------------------------*/
4700   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4701   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4702 
4703   /* post the Irecv of j-structure */
4704   /*-------------------------------*/
4705   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4706   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4707 
4708   /* post the Isend of j-structure */
4709   /*--------------------------------*/
4710   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4711 
4712   for (proc=0, k=0; proc<size; proc++) {
4713     if (!len_s[proc]) continue;
4714     i    = owners[proc];
4715     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4716     k++;
4717   }
4718 
4719   /* receives and sends of j-structure are complete */
4720   /*------------------------------------------------*/
4721   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4722   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4723 
4724   /* send and recv i-structure */
4725   /*---------------------------*/
4726   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4727   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4728 
4729   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4730   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4731   for (proc=0,k=0; proc<size; proc++) {
4732     if (!len_s[proc]) continue;
4733     /* form outgoing message for i-structure:
4734          buf_si[0]:                 nrows to be sent
4735                [1:nrows]:           row index (global)
4736                [nrows+1:2*nrows+1]: i-structure index
4737     */
4738     /*-------------------------------------------*/
4739     nrows       = len_si[proc]/2 - 1;
4740     buf_si_i    = buf_si + nrows+1;
4741     buf_si[0]   = nrows;
4742     buf_si_i[0] = 0;
4743     nrows       = 0;
4744     for (i=owners[proc]; i<owners[proc+1]; i++) {
4745       anzi = ai[i+1] - ai[i];
4746       if (anzi) {
4747         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4748         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4749         nrows++;
4750       }
4751     }
4752     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4753     k++;
4754     buf_si += len_si[proc];
4755   }
4756 
4757   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4758   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4759 
4760   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4761   for (i=0; i<merge->nrecv; i++) {
4762     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4763   }
4764 
4765   ierr = PetscFree(len_si);CHKERRQ(ierr);
4766   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4767   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4768   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4769   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4770   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4771   ierr = PetscFree(status);CHKERRQ(ierr);
4772 
4773   /* compute a local seq matrix in each processor */
4774   /*----------------------------------------------*/
4775   /* allocate bi array and free space for accumulating nonzero column info */
4776   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4777   bi[0] = 0;
4778 
4779   /* create and initialize a linked list */
4780   nlnk = N+1;
4781   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4782 
4783   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4784   len  = ai[owners[rank+1]] - ai[owners[rank]];
4785   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4786 
4787   current_space = free_space;
4788 
4789   /* determine symbolic info for each local row */
4790   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4791 
4792   for (k=0; k<merge->nrecv; k++) {
4793     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4794     nrows       = *buf_ri_k[k];
4795     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4796     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4797   }
4798 
4799   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4800   len  = 0;
4801   for (i=0; i<m; i++) {
4802     bnzi = 0;
4803     /* add local non-zero cols of this proc's seqmat into lnk */
4804     arow  = owners[rank] + i;
4805     anzi  = ai[arow+1] - ai[arow];
4806     aj    = a->j + ai[arow];
4807     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4808     bnzi += nlnk;
4809     /* add received col data into lnk */
4810     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4811       if (i == *nextrow[k]) { /* i-th row */
4812         anzi  = *(nextai[k]+1) - *nextai[k];
4813         aj    = buf_rj[k] + *nextai[k];
4814         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4815         bnzi += nlnk;
4816         nextrow[k]++; nextai[k]++;
4817       }
4818     }
4819     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4820 
4821     /* if free space is not available, make more free space */
4822     if (current_space->local_remaining<bnzi) {
4823       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4824       nspacedouble++;
4825     }
4826     /* copy data into free space, then initialize lnk */
4827     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4828     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4829 
4830     current_space->array           += bnzi;
4831     current_space->local_used      += bnzi;
4832     current_space->local_remaining -= bnzi;
4833 
4834     bi[i+1] = bi[i] + bnzi;
4835   }
4836 
4837   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4838 
4839   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4840   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4841   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4842 
4843   /* create symbolic parallel matrix B_mpi */
4844   /*---------------------------------------*/
4845   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4846   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4847   if (n==PETSC_DECIDE) {
4848     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4849   } else {
4850     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4851   }
4852   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4853   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4854   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4855   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4856   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4857 
4858   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4859   B_mpi->assembled    = PETSC_FALSE;
4860   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4861   merge->bi           = bi;
4862   merge->bj           = bj;
4863   merge->buf_ri       = buf_ri;
4864   merge->buf_rj       = buf_rj;
4865   merge->coi          = NULL;
4866   merge->coj          = NULL;
4867   merge->owners_co    = NULL;
4868 
4869   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4870 
4871   /* attach the supporting struct to B_mpi for reuse */
4872   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4873   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4874   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4875   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4876   *mpimat = B_mpi;
4877 
4878   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4879   PetscFunctionReturn(0);
4880 }
4881 
4882 #undef __FUNCT__
4883 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4884 /*@C
4885       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4886                  matrices from each processor
4887 
4888     Collective on MPI_Comm
4889 
4890    Input Parameters:
4891 +    comm - the communicators the parallel matrix will live on
4892 .    seqmat - the input sequential matrices
4893 .    m - number of local rows (or PETSC_DECIDE)
4894 .    n - number of local columns (or PETSC_DECIDE)
4895 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4896 
4897    Output Parameter:
4898 .    mpimat - the parallel matrix generated
4899 
4900     Level: advanced
4901 
4902    Notes:
4903      The dimensions of the sequential matrix in each processor MUST be the same.
4904      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4905      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4906 @*/
4907 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4908 {
4909   PetscErrorCode ierr;
4910   PetscMPIInt    size;
4911 
4912   PetscFunctionBegin;
4913   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4914   if (size == 1) {
4915     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4916     if (scall == MAT_INITIAL_MATRIX) {
4917       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4918     } else {
4919       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4920     }
4921     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4922     PetscFunctionReturn(0);
4923   }
4924   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4925   if (scall == MAT_INITIAL_MATRIX) {
4926     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4927   }
4928   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4929   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4930   PetscFunctionReturn(0);
4931 }
4932 
4933 #undef __FUNCT__
4934 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4935 /*@
4936      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4937           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4938           with MatGetSize()
4939 
4940     Not Collective
4941 
4942    Input Parameters:
4943 +    A - the matrix
4944 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4945 
4946    Output Parameter:
4947 .    A_loc - the local sequential matrix generated
4948 
4949     Level: developer
4950 
4951 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4952 
4953 @*/
4954 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4955 {
4956   PetscErrorCode ierr;
4957   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4958   Mat_SeqAIJ     *mat,*a,*b;
4959   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4960   MatScalar      *aa,*ba,*cam;
4961   PetscScalar    *ca;
4962   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4963   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4964   PetscBool      match;
4965 
4966   PetscFunctionBegin;
4967   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4968   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4969   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4970   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4971   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4972   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4973   aa = a->a; ba = b->a;
4974   if (scall == MAT_INITIAL_MATRIX) {
4975     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4976     ci[0] = 0;
4977     for (i=0; i<am; i++) {
4978       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4979     }
4980     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4981     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4982     k    = 0;
4983     for (i=0; i<am; i++) {
4984       ncols_o = bi[i+1] - bi[i];
4985       ncols_d = ai[i+1] - ai[i];
4986       /* off-diagonal portion of A */
4987       for (jo=0; jo<ncols_o; jo++) {
4988         col = cmap[*bj];
4989         if (col >= cstart) break;
4990         cj[k]   = col; bj++;
4991         ca[k++] = *ba++;
4992       }
4993       /* diagonal portion of A */
4994       for (j=0; j<ncols_d; j++) {
4995         cj[k]   = cstart + *aj++;
4996         ca[k++] = *aa++;
4997       }
4998       /* off-diagonal portion of A */
4999       for (j=jo; j<ncols_o; j++) {
5000         cj[k]   = cmap[*bj++];
5001         ca[k++] = *ba++;
5002       }
5003     }
5004     /* put together the new matrix */
5005     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5006     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5007     /* Since these are PETSc arrays, change flags to free them as necessary. */
5008     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5009     mat->free_a  = PETSC_TRUE;
5010     mat->free_ij = PETSC_TRUE;
5011     mat->nonew   = 0;
5012   } else if (scall == MAT_REUSE_MATRIX) {
5013     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5014     ci = mat->i; cj = mat->j; cam = mat->a;
5015     for (i=0; i<am; i++) {
5016       /* off-diagonal portion of A */
5017       ncols_o = bi[i+1] - bi[i];
5018       for (jo=0; jo<ncols_o; jo++) {
5019         col = cmap[*bj];
5020         if (col >= cstart) break;
5021         *cam++ = *ba++; bj++;
5022       }
5023       /* diagonal portion of A */
5024       ncols_d = ai[i+1] - ai[i];
5025       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5026       /* off-diagonal portion of A */
5027       for (j=jo; j<ncols_o; j++) {
5028         *cam++ = *ba++; bj++;
5029       }
5030     }
5031   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5032   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5033   PetscFunctionReturn(0);
5034 }
5035 
5036 #undef __FUNCT__
5037 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5038 /*@C
5039      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5040 
5041     Not Collective
5042 
5043    Input Parameters:
5044 +    A - the matrix
5045 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5046 -    row, col - index sets of rows and columns to extract (or NULL)
5047 
5048    Output Parameter:
5049 .    A_loc - the local sequential matrix generated
5050 
5051     Level: developer
5052 
5053 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5054 
5055 @*/
5056 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5057 {
5058   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5059   PetscErrorCode ierr;
5060   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5061   IS             isrowa,iscola;
5062   Mat            *aloc;
5063   PetscBool      match;
5064 
5065   PetscFunctionBegin;
5066   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5067   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5068   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5069   if (!row) {
5070     start = A->rmap->rstart; end = A->rmap->rend;
5071     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5072   } else {
5073     isrowa = *row;
5074   }
5075   if (!col) {
5076     start = A->cmap->rstart;
5077     cmap  = a->garray;
5078     nzA   = a->A->cmap->n;
5079     nzB   = a->B->cmap->n;
5080     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5081     ncols = 0;
5082     for (i=0; i<nzB; i++) {
5083       if (cmap[i] < start) idx[ncols++] = cmap[i];
5084       else break;
5085     }
5086     imark = i;
5087     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5088     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5089     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5090   } else {
5091     iscola = *col;
5092   }
5093   if (scall != MAT_INITIAL_MATRIX) {
5094     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5095     aloc[0] = *A_loc;
5096   }
5097   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5098   *A_loc = aloc[0];
5099   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5100   if (!row) {
5101     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5102   }
5103   if (!col) {
5104     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5105   }
5106   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5107   PetscFunctionReturn(0);
5108 }
5109 
5110 #undef __FUNCT__
5111 #define __FUNCT__ "MatGetBrowsOfAcols"
5112 /*@C
5113     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5114 
5115     Collective on Mat
5116 
5117    Input Parameters:
5118 +    A,B - the matrices in mpiaij format
5119 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5120 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5121 
5122    Output Parameter:
5123 +    rowb, colb - index sets of rows and columns of B to extract
5124 -    B_seq - the sequential matrix generated
5125 
5126     Level: developer
5127 
5128 @*/
5129 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5130 {
5131   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5132   PetscErrorCode ierr;
5133   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5134   IS             isrowb,iscolb;
5135   Mat            *bseq=NULL;
5136 
5137   PetscFunctionBegin;
5138   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5139     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5140   }
5141   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5142 
5143   if (scall == MAT_INITIAL_MATRIX) {
5144     start = A->cmap->rstart;
5145     cmap  = a->garray;
5146     nzA   = a->A->cmap->n;
5147     nzB   = a->B->cmap->n;
5148     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5149     ncols = 0;
5150     for (i=0; i<nzB; i++) {  /* row < local row index */
5151       if (cmap[i] < start) idx[ncols++] = cmap[i];
5152       else break;
5153     }
5154     imark = i;
5155     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5156     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5157     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5158     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5159   } else {
5160     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5161     isrowb  = *rowb; iscolb = *colb;
5162     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5163     bseq[0] = *B_seq;
5164   }
5165   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5166   *B_seq = bseq[0];
5167   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5168   if (!rowb) {
5169     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5170   } else {
5171     *rowb = isrowb;
5172   }
5173   if (!colb) {
5174     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5175   } else {
5176     *colb = iscolb;
5177   }
5178   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5179   PetscFunctionReturn(0);
5180 }
5181 
5182 #undef __FUNCT__
5183 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5184 /*
5185     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5186     of the OFF-DIAGONAL portion of local A
5187 
5188     Collective on Mat
5189 
5190    Input Parameters:
5191 +    A,B - the matrices in mpiaij format
5192 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5193 
5194    Output Parameter:
5195 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5196 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5197 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5198 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5199 
5200     Level: developer
5201 
5202 */
5203 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5204 {
5205   VecScatter_MPI_General *gen_to,*gen_from;
5206   PetscErrorCode         ierr;
5207   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5208   Mat_SeqAIJ             *b_oth;
5209   VecScatter             ctx =a->Mvctx;
5210   MPI_Comm               comm;
5211   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5212   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5213   PetscScalar            *rvalues,*svalues;
5214   MatScalar              *b_otha,*bufa,*bufA;
5215   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5216   MPI_Request            *rwaits = NULL,*swaits = NULL;
5217   MPI_Status             *sstatus,rstatus;
5218   PetscMPIInt            jj;
5219   PetscInt               *cols,sbs,rbs;
5220   PetscScalar            *vals;
5221 
5222   PetscFunctionBegin;
5223   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5224   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5225     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5226   }
5227   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5228   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5229 
5230   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5231   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5232   rvalues  = gen_from->values; /* holds the length of receiving row */
5233   svalues  = gen_to->values;   /* holds the length of sending row */
5234   nrecvs   = gen_from->n;
5235   nsends   = gen_to->n;
5236 
5237   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5238   srow    = gen_to->indices;    /* local row index to be sent */
5239   sstarts = gen_to->starts;
5240   sprocs  = gen_to->procs;
5241   sstatus = gen_to->sstatus;
5242   sbs     = gen_to->bs;
5243   rstarts = gen_from->starts;
5244   rprocs  = gen_from->procs;
5245   rbs     = gen_from->bs;
5246 
5247   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5248   if (scall == MAT_INITIAL_MATRIX) {
5249     /* i-array */
5250     /*---------*/
5251     /*  post receives */
5252     for (i=0; i<nrecvs; i++) {
5253       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5254       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5255       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5256     }
5257 
5258     /* pack the outgoing message */
5259     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5260 
5261     sstartsj[0] = 0;
5262     rstartsj[0] = 0;
5263     len         = 0; /* total length of j or a array to be sent */
5264     k           = 0;
5265     for (i=0; i<nsends; i++) {
5266       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5267       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5268       for (j=0; j<nrows; j++) {
5269         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5270         for (l=0; l<sbs; l++) {
5271           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5272 
5273           rowlen[j*sbs+l] = ncols;
5274 
5275           len += ncols;
5276           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5277         }
5278         k++;
5279       }
5280       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5281 
5282       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5283     }
5284     /* recvs and sends of i-array are completed */
5285     i = nrecvs;
5286     while (i--) {
5287       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5288     }
5289     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5290 
5291     /* allocate buffers for sending j and a arrays */
5292     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5293     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5294 
5295     /* create i-array of B_oth */
5296     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5297 
5298     b_othi[0] = 0;
5299     len       = 0; /* total length of j or a array to be received */
5300     k         = 0;
5301     for (i=0; i<nrecvs; i++) {
5302       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5303       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5304       for (j=0; j<nrows; j++) {
5305         b_othi[k+1] = b_othi[k] + rowlen[j];
5306         len        += rowlen[j]; k++;
5307       }
5308       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5309     }
5310 
5311     /* allocate space for j and a arrrays of B_oth */
5312     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5313     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5314 
5315     /* j-array */
5316     /*---------*/
5317     /*  post receives of j-array */
5318     for (i=0; i<nrecvs; i++) {
5319       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5320       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5321     }
5322 
5323     /* pack the outgoing message j-array */
5324     k = 0;
5325     for (i=0; i<nsends; i++) {
5326       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5327       bufJ  = bufj+sstartsj[i];
5328       for (j=0; j<nrows; j++) {
5329         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5330         for (ll=0; ll<sbs; ll++) {
5331           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5332           for (l=0; l<ncols; l++) {
5333             *bufJ++ = cols[l];
5334           }
5335           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5336         }
5337       }
5338       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5339     }
5340 
5341     /* recvs and sends of j-array are completed */
5342     i = nrecvs;
5343     while (i--) {
5344       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5345     }
5346     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5347   } else if (scall == MAT_REUSE_MATRIX) {
5348     sstartsj = *startsj_s;
5349     rstartsj = *startsj_r;
5350     bufa     = *bufa_ptr;
5351     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5352     b_otha   = b_oth->a;
5353   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5354 
5355   /* a-array */
5356   /*---------*/
5357   /*  post receives of a-array */
5358   for (i=0; i<nrecvs; i++) {
5359     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5360     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5361   }
5362 
5363   /* pack the outgoing message a-array */
5364   k = 0;
5365   for (i=0; i<nsends; i++) {
5366     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5367     bufA  = bufa+sstartsj[i];
5368     for (j=0; j<nrows; j++) {
5369       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5370       for (ll=0; ll<sbs; ll++) {
5371         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5372         for (l=0; l<ncols; l++) {
5373           *bufA++ = vals[l];
5374         }
5375         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5376       }
5377     }
5378     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5379   }
5380   /* recvs and sends of a-array are completed */
5381   i = nrecvs;
5382   while (i--) {
5383     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5384   }
5385   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5386   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5387 
5388   if (scall == MAT_INITIAL_MATRIX) {
5389     /* put together the new matrix */
5390     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5391 
5392     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5393     /* Since these are PETSc arrays, change flags to free them as necessary. */
5394     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5395     b_oth->free_a  = PETSC_TRUE;
5396     b_oth->free_ij = PETSC_TRUE;
5397     b_oth->nonew   = 0;
5398 
5399     ierr = PetscFree(bufj);CHKERRQ(ierr);
5400     if (!startsj_s || !bufa_ptr) {
5401       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5402       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5403     } else {
5404       *startsj_s = sstartsj;
5405       *startsj_r = rstartsj;
5406       *bufa_ptr  = bufa;
5407     }
5408   }
5409   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5410   PetscFunctionReturn(0);
5411 }
5412 
5413 #undef __FUNCT__
5414 #define __FUNCT__ "MatGetCommunicationStructs"
5415 /*@C
5416   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5417 
5418   Not Collective
5419 
5420   Input Parameters:
5421 . A - The matrix in mpiaij format
5422 
5423   Output Parameter:
5424 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5425 . colmap - A map from global column index to local index into lvec
5426 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5427 
5428   Level: developer
5429 
5430 @*/
5431 #if defined(PETSC_USE_CTABLE)
5432 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5433 #else
5434 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5435 #endif
5436 {
5437   Mat_MPIAIJ *a;
5438 
5439   PetscFunctionBegin;
5440   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5441   PetscValidPointer(lvec, 2);
5442   PetscValidPointer(colmap, 3);
5443   PetscValidPointer(multScatter, 4);
5444   a = (Mat_MPIAIJ*) A->data;
5445   if (lvec) *lvec = a->lvec;
5446   if (colmap) *colmap = a->colmap;
5447   if (multScatter) *multScatter = a->Mvctx;
5448   PetscFunctionReturn(0);
5449 }
5450 
5451 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5452 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5453 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5454 
5455 #undef __FUNCT__
5456 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5457 /*
5458     Computes (B'*A')' since computing B*A directly is untenable
5459 
5460                n                       p                          p
5461         (              )       (              )         (                  )
5462       m (      A       )  *  n (       B      )   =   m (         C        )
5463         (              )       (              )         (                  )
5464 
5465 */
5466 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5467 {
5468   PetscErrorCode ierr;
5469   Mat            At,Bt,Ct;
5470 
5471   PetscFunctionBegin;
5472   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5473   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5474   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5475   ierr = MatDestroy(&At);CHKERRQ(ierr);
5476   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5477   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5478   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5479   PetscFunctionReturn(0);
5480 }
5481 
5482 #undef __FUNCT__
5483 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5484 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5485 {
5486   PetscErrorCode ierr;
5487   PetscInt       m=A->rmap->n,n=B->cmap->n;
5488   Mat            Cmat;
5489 
5490   PetscFunctionBegin;
5491   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5492   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5493   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5494   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5495   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5496   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5497   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5498   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5499 
5500   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5501 
5502   *C = Cmat;
5503   PetscFunctionReturn(0);
5504 }
5505 
5506 /* ----------------------------------------------------------------*/
5507 #undef __FUNCT__
5508 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5509 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5510 {
5511   PetscErrorCode ierr;
5512 
5513   PetscFunctionBegin;
5514   if (scall == MAT_INITIAL_MATRIX) {
5515     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5516     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5517     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5518   }
5519   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5520   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5521   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5522   PetscFunctionReturn(0);
5523 }
5524 
5525 #if defined(PETSC_HAVE_MUMPS)
5526 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5527 #endif
5528 #if defined(PETSC_HAVE_PASTIX)
5529 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5530 #endif
5531 #if defined(PETSC_HAVE_SUPERLU_DIST)
5532 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5533 #endif
5534 #if defined(PETSC_HAVE_CLIQUE)
5535 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5536 #endif
5537 
5538 /*MC
5539    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5540 
5541    Options Database Keys:
5542 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5543 
5544   Level: beginner
5545 
5546 .seealso: MatCreateAIJ()
5547 M*/
5548 
5549 #undef __FUNCT__
5550 #define __FUNCT__ "MatCreate_MPIAIJ"
5551 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5552 {
5553   Mat_MPIAIJ     *b;
5554   PetscErrorCode ierr;
5555   PetscMPIInt    size;
5556 
5557   PetscFunctionBegin;
5558   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5559 
5560   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5561   B->data       = (void*)b;
5562   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5563   B->assembled  = PETSC_FALSE;
5564   B->insertmode = NOT_SET_VALUES;
5565   b->size       = size;
5566 
5567   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5568 
5569   /* build cache for off array entries formed */
5570   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5571 
5572   b->donotstash  = PETSC_FALSE;
5573   b->colmap      = 0;
5574   b->garray      = 0;
5575   b->roworiented = PETSC_TRUE;
5576 
5577   /* stuff used for matrix vector multiply */
5578   b->lvec  = NULL;
5579   b->Mvctx = NULL;
5580 
5581   /* stuff for MatGetRow() */
5582   b->rowindices   = 0;
5583   b->rowvalues    = 0;
5584   b->getrowactive = PETSC_FALSE;
5585 
5586   /* flexible pointer used in CUSP/CUSPARSE classes */
5587   b->spptr = NULL;
5588 
5589 #if defined(PETSC_HAVE_MUMPS)
5590   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5591 #endif
5592 #if defined(PETSC_HAVE_PASTIX)
5593   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5594 #endif
5595 #if defined(PETSC_HAVE_SUPERLU_DIST)
5596   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5597 #endif
5598 #if defined(PETSC_HAVE_CLIQUE)
5599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5600 #endif
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5614   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5615   PetscFunctionReturn(0);
5616 }
5617 
5618 #undef __FUNCT__
5619 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5620 /*@
5621      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5622          and "off-diagonal" part of the matrix in CSR format.
5623 
5624    Collective on MPI_Comm
5625 
5626    Input Parameters:
5627 +  comm - MPI communicator
5628 .  m - number of local rows (Cannot be PETSC_DECIDE)
5629 .  n - This value should be the same as the local size used in creating the
5630        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5631        calculated if N is given) For square matrices n is almost always m.
5632 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5633 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5634 .   i - row indices for "diagonal" portion of matrix
5635 .   j - column indices
5636 .   a - matrix values
5637 .   oi - row indices for "off-diagonal" portion of matrix
5638 .   oj - column indices
5639 -   oa - matrix values
5640 
5641    Output Parameter:
5642 .   mat - the matrix
5643 
5644    Level: advanced
5645 
5646    Notes:
5647        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5648        must free the arrays once the matrix has been destroyed and not before.
5649 
5650        The i and j indices are 0 based
5651 
5652        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5653 
5654        This sets local rows and cannot be used to set off-processor values.
5655 
5656        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5657        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5658        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5659        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5660        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5661        communication if it is known that only local entries will be set.
5662 
5663 .keywords: matrix, aij, compressed row, sparse, parallel
5664 
5665 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5666           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5667 @*/
5668 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5669 {
5670   PetscErrorCode ierr;
5671   Mat_MPIAIJ     *maij;
5672 
5673   PetscFunctionBegin;
5674   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5675   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5676   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5677   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5678   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5679   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5680   maij = (Mat_MPIAIJ*) (*mat)->data;
5681 
5682   (*mat)->preallocated = PETSC_TRUE;
5683 
5684   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5685   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5686 
5687   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5688   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5689 
5690   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5691   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5692   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5693   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5694 
5695   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5696   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5697   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5698   PetscFunctionReturn(0);
5699 }
5700 
5701 /*
5702     Special version for direct calls from Fortran
5703 */
5704 #include <petsc-private/fortranimpl.h>
5705 
5706 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5707 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5708 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5709 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5710 #endif
5711 
5712 /* Change these macros so can be used in void function */
5713 #undef CHKERRQ
5714 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5715 #undef SETERRQ2
5716 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5717 #undef SETERRQ3
5718 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5719 #undef SETERRQ
5720 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5721 
5722 #undef __FUNCT__
5723 #define __FUNCT__ "matsetvaluesmpiaij_"
5724 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5725 {
5726   Mat            mat  = *mmat;
5727   PetscInt       m    = *mm, n = *mn;
5728   InsertMode     addv = *maddv;
5729   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5730   PetscScalar    value;
5731   PetscErrorCode ierr;
5732 
5733   MatCheckPreallocated(mat,1);
5734   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5735 
5736 #if defined(PETSC_USE_DEBUG)
5737   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5738 #endif
5739   {
5740     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5741     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5742     PetscBool roworiented = aij->roworiented;
5743 
5744     /* Some Variables required in the macro */
5745     Mat        A                 = aij->A;
5746     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5747     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5748     MatScalar  *aa               = a->a;
5749     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5750     Mat        B                 = aij->B;
5751     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5752     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5753     MatScalar  *ba               = b->a;
5754 
5755     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5756     PetscInt  nonew = a->nonew;
5757     MatScalar *ap1,*ap2;
5758 
5759     PetscFunctionBegin;
5760     for (i=0; i<m; i++) {
5761       if (im[i] < 0) continue;
5762 #if defined(PETSC_USE_DEBUG)
5763       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5764 #endif
5765       if (im[i] >= rstart && im[i] < rend) {
5766         row      = im[i] - rstart;
5767         lastcol1 = -1;
5768         rp1      = aj + ai[row];
5769         ap1      = aa + ai[row];
5770         rmax1    = aimax[row];
5771         nrow1    = ailen[row];
5772         low1     = 0;
5773         high1    = nrow1;
5774         lastcol2 = -1;
5775         rp2      = bj + bi[row];
5776         ap2      = ba + bi[row];
5777         rmax2    = bimax[row];
5778         nrow2    = bilen[row];
5779         low2     = 0;
5780         high2    = nrow2;
5781 
5782         for (j=0; j<n; j++) {
5783           if (roworiented) value = v[i*n+j];
5784           else value = v[i+j*m];
5785           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5786           if (in[j] >= cstart && in[j] < cend) {
5787             col = in[j] - cstart;
5788             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5789           } else if (in[j] < 0) continue;
5790 #if defined(PETSC_USE_DEBUG)
5791           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5792 #endif
5793           else {
5794             if (mat->was_assembled) {
5795               if (!aij->colmap) {
5796                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5797               }
5798 #if defined(PETSC_USE_CTABLE)
5799               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5800               col--;
5801 #else
5802               col = aij->colmap[in[j]] - 1;
5803 #endif
5804               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5805                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5806                 col  =  in[j];
5807                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5808                 B     = aij->B;
5809                 b     = (Mat_SeqAIJ*)B->data;
5810                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5811                 rp2   = bj + bi[row];
5812                 ap2   = ba + bi[row];
5813                 rmax2 = bimax[row];
5814                 nrow2 = bilen[row];
5815                 low2  = 0;
5816                 high2 = nrow2;
5817                 bm    = aij->B->rmap->n;
5818                 ba    = b->a;
5819               }
5820             } else col = in[j];
5821             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5822           }
5823         }
5824       } else if (!aij->donotstash) {
5825         if (roworiented) {
5826           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5827         } else {
5828           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5829         }
5830       }
5831     }
5832   }
5833   PetscFunctionReturnVoid();
5834 }
5835 
5836