xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4e6ef68f07cf4e7ffee1b93dbc438d90283d533b) !
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   if (v) PetscValidScalarPointer(v,6);
502   for (i=0; i<m; i++) {
503     if (im[i] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
506 #endif
507     if (im[i] >= rstart && im[i] < rend) {
508       row      = im[i] - rstart;
509       lastcol1 = -1;
510       rp1      = aj + ai[row];
511       ap1      = aa + ai[row];
512       rmax1    = aimax[row];
513       nrow1    = ailen[row];
514       low1     = 0;
515       high1    = nrow1;
516       lastcol2 = -1;
517       rp2      = bj + bi[row];
518       ap2      = ba + bi[row];
519       rmax2    = bimax[row];
520       nrow2    = bilen[row];
521       low2     = 0;
522       high2    = nrow2;
523 
524       for (j=0; j<n; j++) {
525         if (v) {
526           if (roworiented) value = v[i*n+j];
527           else             value = v[i+j*m];
528         } else value = 0.0;
529         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
530         if (in[j] >= cstart && in[j] < cend) {
531           col   = in[j] - cstart;
532           nonew = a->nonew;
533           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
534         } else if (in[j] < 0) continue;
535 #if defined(PETSC_USE_DEBUG)
536         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
537 #endif
538         else {
539           if (mat->was_assembled) {
540             if (!aij->colmap) {
541               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
542             }
543 #if defined(PETSC_USE_CTABLE)
544             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
545             col--;
546 #else
547             col = aij->colmap[in[j]] - 1;
548 #endif
549             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
550               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
551               col  =  in[j];
552               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
553               B     = aij->B;
554               b     = (Mat_SeqAIJ*)B->data;
555               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
556               rp2   = bj + bi[row];
557               ap2   = ba + bi[row];
558               rmax2 = bimax[row];
559               nrow2 = bilen[row];
560               low2  = 0;
561               high2 = nrow2;
562               bm    = aij->B->rmap->n;
563               ba    = b->a;
564             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
565           } else col = in[j];
566           nonew = b->nonew;
567           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
568         }
569       }
570     } else {
571       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
572       if (!aij->donotstash) {
573         mat->assembled = PETSC_FALSE;
574         if (roworiented) {
575           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
576         } else {
577           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
578         }
579       }
580     }
581   }
582   PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatGetValues_MPIAIJ"
587 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
588 {
589   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
590   PetscErrorCode ierr;
591   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
592   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
593 
594   PetscFunctionBegin;
595   for (i=0; i<m; i++) {
596     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
597     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
598     if (idxm[i] >= rstart && idxm[i] < rend) {
599       row = idxm[i] - rstart;
600       for (j=0; j<n; j++) {
601         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
602         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
603         if (idxn[j] >= cstart && idxn[j] < cend) {
604           col  = idxn[j] - cstart;
605           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
606         } else {
607           if (!aij->colmap) {
608             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
609           }
610 #if defined(PETSC_USE_CTABLE)
611           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
612           col--;
613 #else
614           col = aij->colmap[idxn[j]] - 1;
615 #endif
616           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
617           else {
618             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
619           }
620         }
621       }
622     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
628 
629 #undef __FUNCT__
630 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
631 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
632 {
633   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
634   PetscErrorCode ierr;
635   PetscInt       nstash,reallocs;
636   InsertMode     addv;
637 
638   PetscFunctionBegin;
639   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
640 
641   /* make sure all processors are either in INSERTMODE or ADDMODE */
642   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
643   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
644   mat->insertmode = addv; /* in case this processor had no cache */
645 
646   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
647   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
648   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
649   PetscFunctionReturn(0);
650 }
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
654 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
655 {
656   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
657   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
658   PetscErrorCode ierr;
659   PetscMPIInt    n;
660   PetscInt       i,j,rstart,ncols,flg;
661   PetscInt       *row,*col;
662   PetscBool      other_disassembled;
663   PetscScalar    *val;
664   InsertMode     addv = mat->insertmode;
665 
666   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
667 
668   PetscFunctionBegin;
669   if (!aij->donotstash && !mat->nooffprocentries) {
670     while (1) {
671       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
672       if (!flg) break;
673 
674       for (i=0; i<n; ) {
675         /* Now identify the consecutive vals belonging to the same row */
676         for (j=i,rstart=row[j]; j<n; j++) {
677           if (row[j] != rstart) break;
678         }
679         if (j < n) ncols = j-i;
680         else       ncols = n-i;
681         /* Now assemble all these values with a single function call */
682         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
683 
684         i = j;
685       }
686     }
687     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
688   }
689   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
690   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
691 
692   /* determine if any processor has disassembled, if so we must
693      also disassemble ourselfs, in order that we may reassemble. */
694   /*
695      if nonzero structure of submatrix B cannot change then we know that
696      no processor disassembled thus we can skip this stuff
697   */
698   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
699     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
700     if (mat->was_assembled && !other_disassembled) {
701       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
702     }
703   }
704   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
705     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
706   }
707   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
708   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
709   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
710 
711   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
712 
713   aij->rowvalues = 0;
714 
715   /* used by MatAXPY() */
716   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
717   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
718 
719   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
720   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
721 
722   {
723     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
724     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
725   }
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
732 {
733   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
738   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatZeroRows_MPIAIJ"
744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
745 {
746   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
747   PetscInt      *owners = A->rmap->range;
748   PetscInt       n      = A->rmap->n;
749   PetscMPIInt    size   = mat->size;
750   PetscSF        sf;
751   PetscInt      *lrows;
752   PetscSFNode   *rrows;
753   PetscInt       lastidx = -1, r, p = 0, len = 0;
754   PetscErrorCode ierr;
755 
756   PetscFunctionBegin;
757   /* Create SF where leaves are input rows and roots are owned rows */
758   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
759   for (r = 0; r < n; ++r) lrows[r] = -1;
760   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
761   for (r = 0; r < N; ++r) {
762     const PetscInt idx   = rows[r];
763     PetscBool      found = PETSC_FALSE;
764     /* Trick for efficient searching for sorted rows */
765     if (lastidx > idx) p = 0;
766     lastidx = idx;
767     for (; p < size; ++p) {
768       if (idx >= owners[p] && idx < owners[p+1]) {
769         rrows[r].rank  = p;
770         rrows[r].index = rows[r] - owners[p];
771         found = PETSC_TRUE;
772         break;
773       }
774     }
775     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
776   }
777   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
778   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
779   /* Collect flags for rows to be zeroed */
780   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
781   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
782   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
783   /* Compress and put in row numbers */
784   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
785   /* fix right hand side if needed */
786   if (x && b) {
787     const PetscScalar *xx;
788     PetscScalar       *bb;
789 
790     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
791     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
792     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
793     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
794     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
795   }
796   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
797   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
798   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
799     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
800   } else if (diag != 0.0) {
801     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
802     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
803     for (r = 0; r < len; ++r) {
804       const PetscInt row = lrows[r] + A->rmap->rstart;
805       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
806     }
807     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
808     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
809   } else {
810     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
811   }
812   ierr = PetscFree(lrows);CHKERRQ(ierr);
813   {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
822 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
823 {
824   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
825   PetscErrorCode    ierr;
826   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
827   PetscInt          i,j,r,m,p = 0,len = 0;
828   PetscInt          *lrows,*owners = A->rmap->range;
829   PetscSFNode       *rrows;
830   PetscSF           sf;
831   const PetscScalar *xx;
832   PetscScalar       *bb,*mask;
833   Vec               xmask,lmask;
834   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
835   const PetscInt    *aj, *ii,*ridx;
836   PetscScalar       *aa;
837 #if defined(PETSC_DEBUG)
838   PetscBool found = PETSC_FALSE;
839 #endif
840 
841   PetscFunctionBegin;
842   /* Create SF where leaves are input rows and roots are owned rows */
843   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
844   for (r = 0; r < n; ++r) lrows[r] = -1;
845   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
846   for (r = 0; r < N; ++r) {
847     const PetscInt idx   = rows[r];
848     PetscBool      found = PETSC_FALSE;
849     /* Trick for efficient searching for sorted rows */
850     if (lastidx > idx) p = 0;
851     lastidx = idx;
852     for (; p < size; ++p) {
853       if (idx >= owners[p] && idx < owners[p+1]) {
854         rrows[r].rank  = p;
855         rrows[r].index = rows[r] - owners[p];
856         found = PETSC_TRUE;
857         break;
858       }
859     }
860     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
861   }
862   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
863   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
864   /* Collect flags for rows to be zeroed */
865   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
866   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
867   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
868   /* Compress and put in row numbers */
869   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
870   /* zero diagonal part of matrix */
871   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
872   /* handle off diagonal part of matrix */
873   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
874   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
875   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
876   for (i=0; i<len; i++) bb[lrows[i]] = 1;
877   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
878   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
881   if (x) {
882     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
885     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
886   }
887   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
888   /* remove zeroed rows of off diagonal matrix */
889   ii = aij->i;
890   for (i=0; i<len; i++) {
891     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
892   }
893   /* loop over all elements of off process part of matrix zeroing removed columns*/
894   if (aij->compressedrow.use) {
895     m    = aij->compressedrow.nrows;
896     ii   = aij->compressedrow.i;
897     ridx = aij->compressedrow.rindex;
898     for (i=0; i<m; i++) {
899       n  = ii[i+1] - ii[i];
900       aj = aij->j + ii[i];
901       aa = aij->a + ii[i];
902 
903       for (j=0; j<n; j++) {
904         if (PetscAbsScalar(mask[*aj])) {
905           if (b) bb[*ridx] -= *aa*xx[*aj];
906           *aa = 0.0;
907         }
908         aa++;
909         aj++;
910       }
911       ridx++;
912     }
913   } else { /* do not use compressed row format */
914     m = l->B->rmap->n;
915     for (i=0; i<m; i++) {
916       n  = ii[i+1] - ii[i];
917       aj = aij->j + ii[i];
918       aa = aij->a + ii[i];
919       for (j=0; j<n; j++) {
920         if (PetscAbsScalar(mask[*aj])) {
921           if (b) bb[i] -= *aa*xx[*aj];
922           *aa = 0.0;
923         }
924         aa++;
925         aj++;
926       }
927     }
928   }
929   if (x) {
930     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
931     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
932   }
933   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
934   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
935   ierr = PetscFree(lrows);CHKERRQ(ierr);
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatMult_MPIAIJ"
941 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
942 {
943   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
944   PetscErrorCode ierr;
945   PetscInt       nt;
946 
947   PetscFunctionBegin;
948   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
949   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
950   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
952   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
953   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
954   PetscFunctionReturn(0);
955 }
956 
957 #undef __FUNCT__
958 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
959 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
960 {
961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultAdd_MPIAIJ"
971 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
978   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
979   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
980   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
986 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
987 {
988   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
989   PetscErrorCode ierr;
990   PetscBool      merged;
991 
992   PetscFunctionBegin;
993   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
994   /* do nondiagonal part */
995   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
996   if (!merged) {
997     /* send it on its way */
998     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
999     /* do local part */
1000     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1001     /* receive remote parts: note this assumes the values are not actually */
1002     /* added in yy until the next line, */
1003     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1004   } else {
1005     /* do local part */
1006     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1007     /* send it on its way */
1008     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1009     /* values actually were received in the Begin() but we need to call this nop */
1010     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1011   }
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1017 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1018 {
1019   MPI_Comm       comm;
1020   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1021   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1022   IS             Me,Notme;
1023   PetscErrorCode ierr;
1024   PetscInt       M,N,first,last,*notme,i;
1025   PetscMPIInt    size;
1026 
1027   PetscFunctionBegin;
1028   /* Easy test: symmetric diagonal block */
1029   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1030   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1031   if (!*f) PetscFunctionReturn(0);
1032   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1033   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1034   if (size == 1) PetscFunctionReturn(0);
1035 
1036   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1037   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1038   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1039   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1040   for (i=0; i<first; i++) notme[i] = i;
1041   for (i=last; i<M; i++) notme[i-last+first] = i;
1042   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1043   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1044   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1045   Aoff = Aoffs[0];
1046   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1047   Boff = Boffs[0];
1048   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1049   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1050   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1051   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1052   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1053   ierr = PetscFree(notme);CHKERRQ(ierr);
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1060 {
1061   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1062   PetscErrorCode ierr;
1063 
1064   PetscFunctionBegin;
1065   /* do nondiagonal part */
1066   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1067   /* send it on its way */
1068   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1069   /* do local part */
1070   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1071   /* receive remote parts */
1072   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 /*
1077   This only works correctly for square matrices where the subblock A->A is the
1078    diagonal block
1079 */
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1082 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1083 {
1084   PetscErrorCode ierr;
1085   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1086 
1087   PetscFunctionBegin;
1088   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1089   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1090   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "MatScale_MPIAIJ"
1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1097 {
1098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1103   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatDestroy_MPIAIJ"
1109 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1110 {
1111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1112   PetscErrorCode ierr;
1113 
1114   PetscFunctionBegin;
1115 #if defined(PETSC_USE_LOG)
1116   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1117 #endif
1118   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1119   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1120   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1121   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1122 #if defined(PETSC_USE_CTABLE)
1123   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1124 #else
1125   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1126 #endif
1127   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1128   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1129   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1130   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1131   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1132   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1133 
1134   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1135   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1136   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1140   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1148 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1149 {
1150   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1151   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1152   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1153   PetscErrorCode ierr;
1154   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1155   int            fd;
1156   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1157   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1158   PetscScalar    *column_values;
1159   PetscInt       message_count,flowcontrolcount;
1160   FILE           *file;
1161 
1162   PetscFunctionBegin;
1163   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1164   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1165   nz   = A->nz + B->nz;
1166   if (!rank) {
1167     header[0] = MAT_FILE_CLASSID;
1168     header[1] = mat->rmap->N;
1169     header[2] = mat->cmap->N;
1170 
1171     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1172     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1173     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1174     /* get largest number of rows any processor has */
1175     rlen  = mat->rmap->n;
1176     range = mat->rmap->range;
1177     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1178   } else {
1179     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1180     rlen = mat->rmap->n;
1181   }
1182 
1183   /* load up the local row counts */
1184   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1185   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1186 
1187   /* store the row lengths to the file */
1188   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1189   if (!rank) {
1190     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1191     for (i=1; i<size; i++) {
1192       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1193       rlen = range[i+1] - range[i];
1194       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1195       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1196     }
1197     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1198   } else {
1199     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1200     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1201     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1202   }
1203   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1204 
1205   /* load up the local column indices */
1206   nzmax = nz; /* th processor needs space a largest processor needs */
1207   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1208   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1209   cnt   = 0;
1210   for (i=0; i<mat->rmap->n; i++) {
1211     for (j=B->i[i]; j<B->i[i+1]; j++) {
1212       if ((col = garray[B->j[j]]) > cstart) break;
1213       column_indices[cnt++] = col;
1214     }
1215     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1216     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1217   }
1218   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1219 
1220   /* store the column indices to the file */
1221   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1222   if (!rank) {
1223     MPI_Status status;
1224     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1225     for (i=1; i<size; i++) {
1226       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1227       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1228       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1229       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1230       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1231     }
1232     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1233   } else {
1234     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1235     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1236     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1237     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1238   }
1239   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1240 
1241   /* load up the local column values */
1242   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1243   cnt  = 0;
1244   for (i=0; i<mat->rmap->n; i++) {
1245     for (j=B->i[i]; j<B->i[i+1]; j++) {
1246       if (garray[B->j[j]] > cstart) break;
1247       column_values[cnt++] = B->a[j];
1248     }
1249     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1250     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1251   }
1252   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1253 
1254   /* store the column values to the file */
1255   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1256   if (!rank) {
1257     MPI_Status status;
1258     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1259     for (i=1; i<size; i++) {
1260       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1261       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1262       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1263       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1264       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1265     }
1266     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1267   } else {
1268     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1269     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1270     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1271     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1272   }
1273   ierr = PetscFree(column_values);CHKERRQ(ierr);
1274 
1275   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1276   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #include <petscdraw.h>
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1284 {
1285   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1286   PetscErrorCode    ierr;
1287   PetscMPIInt       rank = aij->rank,size = aij->size;
1288   PetscBool         isdraw,iascii,isbinary;
1289   PetscViewer       sviewer;
1290   PetscViewerFormat format;
1291 
1292   PetscFunctionBegin;
1293   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1294   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1295   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1296   if (iascii) {
1297     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1298     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1299       MatInfo   info;
1300       PetscBool inodes;
1301 
1302       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1303       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1304       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1306       if (!inodes) {
1307         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1308                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1309       } else {
1310         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1311                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1312       }
1313       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1314       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1315       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1316       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1317       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1318       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1319       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1320       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1321       PetscFunctionReturn(0);
1322     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1323       PetscInt inodecount,inodelimit,*inodes;
1324       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1325       if (inodes) {
1326         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1327       } else {
1328         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1329       }
1330       PetscFunctionReturn(0);
1331     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1332       PetscFunctionReturn(0);
1333     }
1334   } else if (isbinary) {
1335     if (size == 1) {
1336       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1337       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1338     } else {
1339       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1340     }
1341     PetscFunctionReturn(0);
1342   } else if (isdraw) {
1343     PetscDraw draw;
1344     PetscBool isnull;
1345     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1346     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1347   }
1348 
1349   if (size == 1) {
1350     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1351     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1352   } else {
1353     /* assemble the entire matrix onto first processor. */
1354     Mat        A;
1355     Mat_SeqAIJ *Aloc;
1356     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1357     MatScalar  *a;
1358 
1359     if (mat->rmap->N > 1024) {
1360       PetscBool flg = PETSC_FALSE;
1361 
1362       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1363       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1364     }
1365 
1366     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1367     if (!rank) {
1368       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1369     } else {
1370       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1371     }
1372     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1373     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1374     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1375     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1376     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1377 
1378     /* copy over the A part */
1379     Aloc = (Mat_SeqAIJ*)aij->A->data;
1380     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1381     row  = mat->rmap->rstart;
1382     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1383     for (i=0; i<m; i++) {
1384       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1385       row++;
1386       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1387     }
1388     aj = Aloc->j;
1389     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1390 
1391     /* copy over the B part */
1392     Aloc = (Mat_SeqAIJ*)aij->B->data;
1393     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1394     row  = mat->rmap->rstart;
1395     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1396     ct   = cols;
1397     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1398     for (i=0; i<m; i++) {
1399       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1400       row++;
1401       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1402     }
1403     ierr = PetscFree(ct);CHKERRQ(ierr);
1404     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1405     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1406     /*
1407        Everyone has to call to draw the matrix since the graphics waits are
1408        synchronized across all processors that share the PetscDraw object
1409     */
1410     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1411     if (!rank) {
1412       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1413       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1414       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1415       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1416     }
1417     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1418     ierr = MatDestroy(&A);CHKERRQ(ierr);
1419   }
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatView_MPIAIJ"
1425 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1426 {
1427   PetscErrorCode ierr;
1428   PetscBool      iascii,isdraw,issocket,isbinary;
1429 
1430   PetscFunctionBegin;
1431   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1432   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1433   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1434   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1435   if (iascii || isdraw || isbinary || issocket) {
1436     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1437   }
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatSOR_MPIAIJ"
1443 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1444 {
1445   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1446   PetscErrorCode ierr;
1447   Vec            bb1 = 0;
1448   PetscBool      hasop;
1449 
1450   PetscFunctionBegin;
1451   if (flag == SOR_APPLY_UPPER) {
1452     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1453     PetscFunctionReturn(0);
1454   }
1455 
1456   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1457     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1458   }
1459 
1460   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1461     if (flag & SOR_ZERO_INITIAL_GUESS) {
1462       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1463       its--;
1464     }
1465 
1466     while (its--) {
1467       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1468       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1469 
1470       /* update rhs: bb1 = bb - B*x */
1471       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1472       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1473 
1474       /* local sweep */
1475       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1476     }
1477   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1478     if (flag & SOR_ZERO_INITIAL_GUESS) {
1479       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1480       its--;
1481     }
1482     while (its--) {
1483       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1484       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1485 
1486       /* update rhs: bb1 = bb - B*x */
1487       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1488       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1489 
1490       /* local sweep */
1491       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1492     }
1493   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1494     if (flag & SOR_ZERO_INITIAL_GUESS) {
1495       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1496       its--;
1497     }
1498     while (its--) {
1499       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1500       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1501 
1502       /* update rhs: bb1 = bb - B*x */
1503       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1504       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1505 
1506       /* local sweep */
1507       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1508     }
1509   } else if (flag & SOR_EISENSTAT) {
1510     Vec xx1;
1511 
1512     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1513     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1514 
1515     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1516     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1517     if (!mat->diag) {
1518       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1519       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1520     }
1521     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1522     if (hasop) {
1523       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1524     } else {
1525       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1526     }
1527     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1528 
1529     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1530 
1531     /* local sweep */
1532     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1533     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1534     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1535   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1536 
1537   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 #undef __FUNCT__
1542 #define __FUNCT__ "MatPermute_MPIAIJ"
1543 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1544 {
1545   Mat            aA,aB,Aperm;
1546   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1547   PetscScalar    *aa,*ba;
1548   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1549   PetscSF        rowsf,sf;
1550   IS             parcolp = NULL;
1551   PetscBool      done;
1552   PetscErrorCode ierr;
1553 
1554   PetscFunctionBegin;
1555   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1556   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1557   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1558   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1559 
1560   /* Invert row permutation to find out where my rows should go */
1561   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1562   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1563   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1564   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1565   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1566   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1567 
1568   /* Invert column permutation to find out where my columns should go */
1569   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1570   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1571   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1572   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1573   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1574   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1575   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1576 
1577   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1578   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1579   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1580 
1581   /* Find out where my gcols should go */
1582   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1583   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1584   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1585   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1586   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1587   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1588   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1589   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1590 
1591   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1592   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1593   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1594   for (i=0; i<m; i++) {
1595     PetscInt row = rdest[i],rowner;
1596     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1597     for (j=ai[i]; j<ai[i+1]; j++) {
1598       PetscInt cowner,col = cdest[aj[j]];
1599       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1600       if (rowner == cowner) dnnz[i]++;
1601       else onnz[i]++;
1602     }
1603     for (j=bi[i]; j<bi[i+1]; j++) {
1604       PetscInt cowner,col = gcdest[bj[j]];
1605       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1606       if (rowner == cowner) dnnz[i]++;
1607       else onnz[i]++;
1608     }
1609   }
1610   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1611   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1612   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1613   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1614   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1615 
1616   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1617   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1618   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1619   for (i=0; i<m; i++) {
1620     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1621     PetscInt j0,rowlen;
1622     rowlen = ai[i+1] - ai[i];
1623     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1624       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1625       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1626     }
1627     rowlen = bi[i+1] - bi[i];
1628     for (j0=j=0; j<rowlen; j0=j) {
1629       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1630       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1631     }
1632   }
1633   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1634   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1635   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1636   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1637   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1638   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1639   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1640   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1641   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1642   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1643   *B = Aperm;
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 #undef __FUNCT__
1648 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1649 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1650 {
1651   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1652   Mat            A    = mat->A,B = mat->B;
1653   PetscErrorCode ierr;
1654   PetscReal      isend[5],irecv[5];
1655 
1656   PetscFunctionBegin;
1657   info->block_size = 1.0;
1658   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1659 
1660   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1661   isend[3] = info->memory;  isend[4] = info->mallocs;
1662 
1663   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1664 
1665   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1666   isend[3] += info->memory;  isend[4] += info->mallocs;
1667   if (flag == MAT_LOCAL) {
1668     info->nz_used      = isend[0];
1669     info->nz_allocated = isend[1];
1670     info->nz_unneeded  = isend[2];
1671     info->memory       = isend[3];
1672     info->mallocs      = isend[4];
1673   } else if (flag == MAT_GLOBAL_MAX) {
1674     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1675 
1676     info->nz_used      = irecv[0];
1677     info->nz_allocated = irecv[1];
1678     info->nz_unneeded  = irecv[2];
1679     info->memory       = irecv[3];
1680     info->mallocs      = irecv[4];
1681   } else if (flag == MAT_GLOBAL_SUM) {
1682     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1683 
1684     info->nz_used      = irecv[0];
1685     info->nz_allocated = irecv[1];
1686     info->nz_unneeded  = irecv[2];
1687     info->memory       = irecv[3];
1688     info->mallocs      = irecv[4];
1689   }
1690   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1691   info->fill_ratio_needed = 0;
1692   info->factor_mallocs    = 0;
1693   PetscFunctionReturn(0);
1694 }
1695 
1696 #undef __FUNCT__
1697 #define __FUNCT__ "MatSetOption_MPIAIJ"
1698 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1699 {
1700   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1701   PetscErrorCode ierr;
1702 
1703   PetscFunctionBegin;
1704   switch (op) {
1705   case MAT_NEW_NONZERO_LOCATIONS:
1706   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1707   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1708   case MAT_KEEP_NONZERO_PATTERN:
1709   case MAT_NEW_NONZERO_LOCATION_ERR:
1710   case MAT_USE_INODES:
1711   case MAT_IGNORE_ZERO_ENTRIES:
1712     MatCheckPreallocated(A,1);
1713     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1714     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1715     break;
1716   case MAT_ROW_ORIENTED:
1717     a->roworiented = flg;
1718 
1719     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1720     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1721     break;
1722   case MAT_NEW_DIAGONALS:
1723     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1724     break;
1725   case MAT_IGNORE_OFF_PROC_ENTRIES:
1726     a->donotstash = flg;
1727     break;
1728   case MAT_SPD:
1729     A->spd_set = PETSC_TRUE;
1730     A->spd     = flg;
1731     if (flg) {
1732       A->symmetric                  = PETSC_TRUE;
1733       A->structurally_symmetric     = PETSC_TRUE;
1734       A->symmetric_set              = PETSC_TRUE;
1735       A->structurally_symmetric_set = PETSC_TRUE;
1736     }
1737     break;
1738   case MAT_SYMMETRIC:
1739     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1740     break;
1741   case MAT_STRUCTURALLY_SYMMETRIC:
1742     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1743     break;
1744   case MAT_HERMITIAN:
1745     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1746     break;
1747   case MAT_SYMMETRY_ETERNAL:
1748     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1749     break;
1750   default:
1751     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1752   }
1753   PetscFunctionReturn(0);
1754 }
1755 
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatGetRow_MPIAIJ"
1758 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1759 {
1760   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1761   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1762   PetscErrorCode ierr;
1763   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1764   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1765   PetscInt       *cmap,*idx_p;
1766 
1767   PetscFunctionBegin;
1768   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1769   mat->getrowactive = PETSC_TRUE;
1770 
1771   if (!mat->rowvalues && (idx || v)) {
1772     /*
1773         allocate enough space to hold information from the longest row.
1774     */
1775     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1776     PetscInt   max = 1,tmp;
1777     for (i=0; i<matin->rmap->n; i++) {
1778       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1779       if (max < tmp) max = tmp;
1780     }
1781     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1782   }
1783 
1784   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1785   lrow = row - rstart;
1786 
1787   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1788   if (!v)   {pvA = 0; pvB = 0;}
1789   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1790   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1791   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1792   nztot = nzA + nzB;
1793 
1794   cmap = mat->garray;
1795   if (v  || idx) {
1796     if (nztot) {
1797       /* Sort by increasing column numbers, assuming A and B already sorted */
1798       PetscInt imark = -1;
1799       if (v) {
1800         *v = v_p = mat->rowvalues;
1801         for (i=0; i<nzB; i++) {
1802           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1803           else break;
1804         }
1805         imark = i;
1806         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1807         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1808       }
1809       if (idx) {
1810         *idx = idx_p = mat->rowindices;
1811         if (imark > -1) {
1812           for (i=0; i<imark; i++) {
1813             idx_p[i] = cmap[cworkB[i]];
1814           }
1815         } else {
1816           for (i=0; i<nzB; i++) {
1817             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1818             else break;
1819           }
1820           imark = i;
1821         }
1822         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1823         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1824       }
1825     } else {
1826       if (idx) *idx = 0;
1827       if (v)   *v   = 0;
1828     }
1829   }
1830   *nz  = nztot;
1831   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1832   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 #undef __FUNCT__
1837 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1838 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1839 {
1840   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1841 
1842   PetscFunctionBegin;
1843   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1844   aij->getrowactive = PETSC_FALSE;
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatNorm_MPIAIJ"
1850 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1851 {
1852   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1853   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1854   PetscErrorCode ierr;
1855   PetscInt       i,j,cstart = mat->cmap->rstart;
1856   PetscReal      sum = 0.0;
1857   MatScalar      *v;
1858 
1859   PetscFunctionBegin;
1860   if (aij->size == 1) {
1861     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1862   } else {
1863     if (type == NORM_FROBENIUS) {
1864       v = amat->a;
1865       for (i=0; i<amat->nz; i++) {
1866         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1867       }
1868       v = bmat->a;
1869       for (i=0; i<bmat->nz; i++) {
1870         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1871       }
1872       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1873       *norm = PetscSqrtReal(*norm);
1874     } else if (type == NORM_1) { /* max column norm */
1875       PetscReal *tmp,*tmp2;
1876       PetscInt  *jj,*garray = aij->garray;
1877       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1878       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1879       *norm = 0.0;
1880       v     = amat->a; jj = amat->j;
1881       for (j=0; j<amat->nz; j++) {
1882         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1883       }
1884       v = bmat->a; jj = bmat->j;
1885       for (j=0; j<bmat->nz; j++) {
1886         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1887       }
1888       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1889       for (j=0; j<mat->cmap->N; j++) {
1890         if (tmp2[j] > *norm) *norm = tmp2[j];
1891       }
1892       ierr = PetscFree(tmp);CHKERRQ(ierr);
1893       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1894     } else if (type == NORM_INFINITY) { /* max row norm */
1895       PetscReal ntemp = 0.0;
1896       for (j=0; j<aij->A->rmap->n; j++) {
1897         v   = amat->a + amat->i[j];
1898         sum = 0.0;
1899         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1900           sum += PetscAbsScalar(*v); v++;
1901         }
1902         v = bmat->a + bmat->i[j];
1903         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1904           sum += PetscAbsScalar(*v); v++;
1905         }
1906         if (sum > ntemp) ntemp = sum;
1907       }
1908       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1909     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1910   }
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatTranspose_MPIAIJ"
1916 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1917 {
1918   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1919   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1920   PetscErrorCode ierr;
1921   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1922   PetscInt       cstart = A->cmap->rstart,ncol;
1923   Mat            B;
1924   MatScalar      *array;
1925 
1926   PetscFunctionBegin;
1927   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1928 
1929   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1930   ai = Aloc->i; aj = Aloc->j;
1931   bi = Bloc->i; bj = Bloc->j;
1932   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1933     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1934     PetscSFNode          *oloc;
1935     PETSC_UNUSED PetscSF sf;
1936 
1937     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1938     /* compute d_nnz for preallocation */
1939     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1940     for (i=0; i<ai[ma]; i++) {
1941       d_nnz[aj[i]]++;
1942       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1943     }
1944     /* compute local off-diagonal contributions */
1945     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1946     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1947     /* map those to global */
1948     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1949     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1950     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1951     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1952     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1953     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1954     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1955 
1956     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1957     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1958     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1959     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1960     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1961     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1962   } else {
1963     B    = *matout;
1964     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1965     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1966   }
1967 
1968   /* copy over the A part */
1969   array = Aloc->a;
1970   row   = A->rmap->rstart;
1971   for (i=0; i<ma; i++) {
1972     ncol = ai[i+1]-ai[i];
1973     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1974     row++;
1975     array += ncol; aj += ncol;
1976   }
1977   aj = Aloc->j;
1978   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1979 
1980   /* copy over the B part */
1981   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1982   array = Bloc->a;
1983   row   = A->rmap->rstart;
1984   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1985   cols_tmp = cols;
1986   for (i=0; i<mb; i++) {
1987     ncol = bi[i+1]-bi[i];
1988     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1989     row++;
1990     array += ncol; cols_tmp += ncol;
1991   }
1992   ierr = PetscFree(cols);CHKERRQ(ierr);
1993 
1994   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1995   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1996   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1997     *matout = B;
1998   } else {
1999     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2000   }
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 #undef __FUNCT__
2005 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2006 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2007 {
2008   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2009   Mat            a    = aij->A,b = aij->B;
2010   PetscErrorCode ierr;
2011   PetscInt       s1,s2,s3;
2012 
2013   PetscFunctionBegin;
2014   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2015   if (rr) {
2016     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2017     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2018     /* Overlap communication with computation. */
2019     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2020   }
2021   if (ll) {
2022     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2023     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2024     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2025   }
2026   /* scale  the diagonal block */
2027   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2028 
2029   if (rr) {
2030     /* Do a scatter end and then right scale the off-diagonal block */
2031     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2032     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2033   }
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2039 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2040 {
2041   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2042   PetscErrorCode ierr;
2043 
2044   PetscFunctionBegin;
2045   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2046   PetscFunctionReturn(0);
2047 }
2048 
2049 #undef __FUNCT__
2050 #define __FUNCT__ "MatEqual_MPIAIJ"
2051 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2052 {
2053   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2054   Mat            a,b,c,d;
2055   PetscBool      flg;
2056   PetscErrorCode ierr;
2057 
2058   PetscFunctionBegin;
2059   a = matA->A; b = matA->B;
2060   c = matB->A; d = matB->B;
2061 
2062   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2063   if (flg) {
2064     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2065   }
2066   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatCopy_MPIAIJ"
2072 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2073 {
2074   PetscErrorCode ierr;
2075   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2076   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2077 
2078   PetscFunctionBegin;
2079   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2080   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2081     /* because of the column compression in the off-processor part of the matrix a->B,
2082        the number of columns in a->B and b->B may be different, hence we cannot call
2083        the MatCopy() directly on the two parts. If need be, we can provide a more
2084        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2085        then copying the submatrices */
2086     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2087   } else {
2088     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2089     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2090   }
2091   PetscFunctionReturn(0);
2092 }
2093 
2094 #undef __FUNCT__
2095 #define __FUNCT__ "MatSetUp_MPIAIJ"
2096 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2097 {
2098   PetscErrorCode ierr;
2099 
2100   PetscFunctionBegin;
2101   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 #undef __FUNCT__
2106 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2107 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2108 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2109 {
2110   PetscInt       i,m=Y->rmap->N;
2111   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2112   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2113   const PetscInt *xi = x->i,*yi = y->i;
2114 
2115   PetscFunctionBegin;
2116   /* Set the number of nonzeros in the new matrix */
2117   for (i=0; i<m; i++) {
2118     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2119     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2120     nnz[i] = 0;
2121     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2122       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2123       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2124       nnz[i]++;
2125     }
2126     for (; k<nzy; k++) nnz[i]++;
2127   }
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 #undef __FUNCT__
2132 #define __FUNCT__ "MatAXPY_MPIAIJ"
2133 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2134 {
2135   PetscErrorCode ierr;
2136   PetscInt       i;
2137   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2138   PetscBLASInt   bnz,one=1;
2139   Mat_SeqAIJ     *x,*y;
2140 
2141   PetscFunctionBegin;
2142   if (str == SAME_NONZERO_PATTERN) {
2143     PetscScalar alpha = a;
2144     x    = (Mat_SeqAIJ*)xx->A->data;
2145     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2146     y    = (Mat_SeqAIJ*)yy->A->data;
2147     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2148     x    = (Mat_SeqAIJ*)xx->B->data;
2149     y    = (Mat_SeqAIJ*)yy->B->data;
2150     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2151     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2152   } else if (str == SUBSET_NONZERO_PATTERN) {
2153     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2154 
2155     x = (Mat_SeqAIJ*)xx->B->data;
2156     y = (Mat_SeqAIJ*)yy->B->data;
2157     if (y->xtoy && y->XtoY != xx->B) {
2158       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2159       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2160     }
2161     if (!y->xtoy) { /* get xtoy */
2162       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2163       y->XtoY = xx->B;
2164       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2165     }
2166     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2167   } else {
2168     Mat      B;
2169     PetscInt *nnz_d,*nnz_o;
2170     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2171     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2172     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2173     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2174     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2175     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2176     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2177     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2178     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2179     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2180     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2181     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2182     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2183     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2184   }
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2189 
2190 #undef __FUNCT__
2191 #define __FUNCT__ "MatConjugate_MPIAIJ"
2192 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2193 {
2194 #if defined(PETSC_USE_COMPLEX)
2195   PetscErrorCode ierr;
2196   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2197 
2198   PetscFunctionBegin;
2199   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2200   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2201 #else
2202   PetscFunctionBegin;
2203 #endif
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 #undef __FUNCT__
2208 #define __FUNCT__ "MatRealPart_MPIAIJ"
2209 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2210 {
2211   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2212   PetscErrorCode ierr;
2213 
2214   PetscFunctionBegin;
2215   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2216   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2217   PetscFunctionReturn(0);
2218 }
2219 
2220 #undef __FUNCT__
2221 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2222 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2223 {
2224   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2225   PetscErrorCode ierr;
2226 
2227   PetscFunctionBegin;
2228   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2229   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #if defined(PETSC_HAVE_PBGL)
2234 
2235 #include <boost/parallel/mpi/bsp_process_group.hpp>
2236 #include <boost/graph/distributed/ilu_default_graph.hpp>
2237 #include <boost/graph/distributed/ilu_0_block.hpp>
2238 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2239 #include <boost/graph/distributed/petsc/interface.hpp>
2240 #include <boost/multi_array.hpp>
2241 #include <boost/parallel/distributed_property_map->hpp>
2242 
2243 #undef __FUNCT__
2244 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2245 /*
2246   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2247 */
2248 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2249 {
2250   namespace petsc = boost::distributed::petsc;
2251 
2252   namespace graph_dist = boost::graph::distributed;
2253   using boost::graph::distributed::ilu_default::process_group_type;
2254   using boost::graph::ilu_permuted;
2255 
2256   PetscBool      row_identity, col_identity;
2257   PetscContainer c;
2258   PetscInt       m, n, M, N;
2259   PetscErrorCode ierr;
2260 
2261   PetscFunctionBegin;
2262   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2263   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2264   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2265   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2266 
2267   process_group_type pg;
2268   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2269   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2270   lgraph_type& level_graph = *lgraph_p;
2271   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2272 
2273   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2274   ilu_permuted(level_graph);
2275 
2276   /* put together the new matrix */
2277   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2278   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2279   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2280   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2281   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2282   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2283   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2284   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2285 
2286   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2287   ierr = PetscContainerSetPointer(c, lgraph_p);
2288   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2289   ierr = PetscContainerDestroy(&c);
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 #undef __FUNCT__
2294 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2295 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2296 {
2297   PetscFunctionBegin;
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 #undef __FUNCT__
2302 #define __FUNCT__ "MatSolve_MPIAIJ"
2303 /*
2304   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2305 */
2306 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2307 {
2308   namespace graph_dist = boost::graph::distributed;
2309 
2310   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2311   lgraph_type    *lgraph_p;
2312   PetscContainer c;
2313   PetscErrorCode ierr;
2314 
2315   PetscFunctionBegin;
2316   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2317   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2318   ierr = VecCopy(b, x);CHKERRQ(ierr);
2319 
2320   PetscScalar *array_x;
2321   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2322   PetscInt sx;
2323   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2324 
2325   PetscScalar *array_b;
2326   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2327   PetscInt sb;
2328   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2329 
2330   lgraph_type& level_graph = *lgraph_p;
2331   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2332 
2333   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2334   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2335   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2336 
2337   typedef boost::iterator_property_map<array_ref_type::iterator,
2338                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2339   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2340   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2341 
2342   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2343   PetscFunctionReturn(0);
2344 }
2345 #endif
2346 
2347 #undef __FUNCT__
2348 #define __FUNCT__ "MatDestroy_MatRedundant"
2349 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2350 {
2351   PetscErrorCode ierr;
2352   Mat_Redundant  *redund;
2353   PetscInt       i;
2354   PetscMPIInt    size;
2355 
2356   PetscFunctionBegin;
2357   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2358   if (size == 1) {
2359     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2360     redund = a->redundant;
2361   } else {
2362     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2363     redund = a->redundant;
2364   }
2365   if (redund){
2366     if (redund->matseq) { /* via MatGetSubMatrices()  */
2367       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2368       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2369       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2370       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2371     } else {
2372       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2373       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2374       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2375       for (i=0; i<redund->nrecvs; i++) {
2376         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2377         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2378       }
2379       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2380     }
2381 
2382     if (redund->psubcomm) {
2383       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2384     }
2385     ierr = redund->Destroy(A);CHKERRQ(ierr);
2386     ierr = PetscFree(redund);CHKERRQ(ierr);
2387   }
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2393 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2394 {
2395   PetscMPIInt    rank,size;
2396   MPI_Comm       comm;
2397   PetscErrorCode ierr;
2398   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2399   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2400   PetscInt       *rowrange = mat->rmap->range;
2401   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2402   Mat            A = aij->A,B=aij->B,C=*matredundant;
2403   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2404   PetscScalar    *sbuf_a;
2405   PetscInt       nzlocal=a->nz+b->nz;
2406   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2407   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2408   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2409   MatScalar      *aworkA,*aworkB;
2410   PetscScalar    *vals;
2411   PetscMPIInt    tag1,tag2,tag3,imdex;
2412   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2413   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2414   MPI_Status     recv_status,*send_status;
2415   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2416   PetscInt       **rbuf_j=NULL;
2417   PetscScalar    **rbuf_a=NULL;
2418   Mat_Redundant  *redund =NULL;
2419 
2420   PetscFunctionBegin;
2421   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2422   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2423   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2424   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2425   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2426 
2427   if (reuse == MAT_REUSE_MATRIX) {
2428     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2429     if (subsize == 1) {
2430       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2431       redund = c->redundant;
2432     } else {
2433       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2434       redund = c->redundant;
2435     }
2436     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2437 
2438     nsends    = redund->nsends;
2439     nrecvs    = redund->nrecvs;
2440     send_rank = redund->send_rank;
2441     recv_rank = redund->recv_rank;
2442     sbuf_nz   = redund->sbuf_nz;
2443     rbuf_nz   = redund->rbuf_nz;
2444     sbuf_j    = redund->sbuf_j;
2445     sbuf_a    = redund->sbuf_a;
2446     rbuf_j    = redund->rbuf_j;
2447     rbuf_a    = redund->rbuf_a;
2448   }
2449 
2450   if (reuse == MAT_INITIAL_MATRIX) {
2451     PetscInt    nleftover,np_subcomm;
2452 
2453     /* get the destination processors' id send_rank, nsends and nrecvs */
2454     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2455 
2456     np_subcomm = size/nsubcomm;
2457     nleftover  = size - nsubcomm*np_subcomm;
2458 
2459     /* block of codes below is specific for INTERLACED */
2460     /* ------------------------------------------------*/
2461     nsends = 0; nrecvs = 0;
2462     for (i=0; i<size; i++) {
2463       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2464         send_rank[nsends++] = i;
2465         recv_rank[nrecvs++] = i;
2466       }
2467     }
2468     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2469       i = size-nleftover-1;
2470       j = 0;
2471       while (j < nsubcomm - nleftover) {
2472         send_rank[nsends++] = i;
2473         i--; j++;
2474       }
2475     }
2476 
2477     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2478       for (i=0; i<nleftover; i++) {
2479         recv_rank[nrecvs++] = size-nleftover+i;
2480       }
2481     }
2482     /*----------------------------------------------*/
2483 
2484     /* allocate sbuf_j, sbuf_a */
2485     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2486     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2487     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2488     /*
2489     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2490     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2491      */
2492   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2493 
2494   /* copy mat's local entries into the buffers */
2495   if (reuse == MAT_INITIAL_MATRIX) {
2496     rownz_max = 0;
2497     rptr      = sbuf_j;
2498     cols      = sbuf_j + rend-rstart + 1;
2499     vals      = sbuf_a;
2500     rptr[0]   = 0;
2501     for (i=0; i<rend-rstart; i++) {
2502       row    = i + rstart;
2503       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2504       ncols  = nzA + nzB;
2505       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2506       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2507       /* load the column indices for this row into cols */
2508       lwrite = 0;
2509       for (l=0; l<nzB; l++) {
2510         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2511           vals[lwrite]   = aworkB[l];
2512           cols[lwrite++] = ctmp;
2513         }
2514       }
2515       for (l=0; l<nzA; l++) {
2516         vals[lwrite]   = aworkA[l];
2517         cols[lwrite++] = cstart + cworkA[l];
2518       }
2519       for (l=0; l<nzB; l++) {
2520         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2521           vals[lwrite]   = aworkB[l];
2522           cols[lwrite++] = ctmp;
2523         }
2524       }
2525       vals     += ncols;
2526       cols     += ncols;
2527       rptr[i+1] = rptr[i] + ncols;
2528       if (rownz_max < ncols) rownz_max = ncols;
2529     }
2530     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2531   } else { /* only copy matrix values into sbuf_a */
2532     rptr    = sbuf_j;
2533     vals    = sbuf_a;
2534     rptr[0] = 0;
2535     for (i=0; i<rend-rstart; i++) {
2536       row    = i + rstart;
2537       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2538       ncols  = nzA + nzB;
2539       cworkB = b->j + b->i[i];
2540       aworkA = a->a + a->i[i];
2541       aworkB = b->a + b->i[i];
2542       lwrite = 0;
2543       for (l=0; l<nzB; l++) {
2544         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2545       }
2546       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2547       for (l=0; l<nzB; l++) {
2548         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2549       }
2550       vals     += ncols;
2551       rptr[i+1] = rptr[i] + ncols;
2552     }
2553   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2554 
2555   /* send nzlocal to others, and recv other's nzlocal */
2556   /*--------------------------------------------------*/
2557   if (reuse == MAT_INITIAL_MATRIX) {
2558     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2559 
2560     s_waits2 = s_waits3 + nsends;
2561     s_waits1 = s_waits2 + nsends;
2562     r_waits1 = s_waits1 + nsends;
2563     r_waits2 = r_waits1 + nrecvs;
2564     r_waits3 = r_waits2 + nrecvs;
2565   } else {
2566     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2567 
2568     r_waits3 = s_waits3 + nsends;
2569   }
2570 
2571   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2572   if (reuse == MAT_INITIAL_MATRIX) {
2573     /* get new tags to keep the communication clean */
2574     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2575     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2576     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2577 
2578     /* post receives of other's nzlocal */
2579     for (i=0; i<nrecvs; i++) {
2580       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2581     }
2582     /* send nzlocal to others */
2583     for (i=0; i<nsends; i++) {
2584       sbuf_nz[i] = nzlocal;
2585       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2586     }
2587     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2588     count = nrecvs;
2589     while (count) {
2590       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2591 
2592       recv_rank[imdex] = recv_status.MPI_SOURCE;
2593       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2594       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2595 
2596       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2597 
2598       rbuf_nz[imdex] += i + 2;
2599 
2600       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2601       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2602       count--;
2603     }
2604     /* wait on sends of nzlocal */
2605     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2606     /* send mat->i,j to others, and recv from other's */
2607     /*------------------------------------------------*/
2608     for (i=0; i<nsends; i++) {
2609       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2610       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2611     }
2612     /* wait on receives of mat->i,j */
2613     /*------------------------------*/
2614     count = nrecvs;
2615     while (count) {
2616       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2617       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2618       count--;
2619     }
2620     /* wait on sends of mat->i,j */
2621     /*---------------------------*/
2622     if (nsends) {
2623       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2624     }
2625   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2626 
2627   /* post receives, send and receive mat->a */
2628   /*----------------------------------------*/
2629   for (imdex=0; imdex<nrecvs; imdex++) {
2630     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2631   }
2632   for (i=0; i<nsends; i++) {
2633     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2634   }
2635   count = nrecvs;
2636   while (count) {
2637     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2638     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2639     count--;
2640   }
2641   if (nsends) {
2642     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2643   }
2644 
2645   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2646 
2647   /* create redundant matrix */
2648   /*-------------------------*/
2649   if (reuse == MAT_INITIAL_MATRIX) {
2650     const PetscInt *range;
2651     PetscInt       rstart_sub,rend_sub,mloc_sub;
2652 
2653     /* compute rownz_max for preallocation */
2654     for (imdex=0; imdex<nrecvs; imdex++) {
2655       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2656       rptr = rbuf_j[imdex];
2657       for (i=0; i<j; i++) {
2658         ncols = rptr[i+1] - rptr[i];
2659         if (rownz_max < ncols) rownz_max = ncols;
2660       }
2661     }
2662 
2663     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2664 
2665     /* get local size of redundant matrix
2666        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2667     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2668     rstart_sub = range[nsubcomm*subrank];
2669     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2670       rend_sub = range[nsubcomm*(subrank+1)];
2671     } else {
2672       rend_sub = mat->rmap->N;
2673     }
2674     mloc_sub = rend_sub - rstart_sub;
2675 
2676     if (M == N) {
2677       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2678     } else { /* non-square matrix */
2679       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2680     }
2681     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2682     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2683     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2684     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2685   } else {
2686     C = *matredundant;
2687   }
2688 
2689   /* insert local matrix entries */
2690   rptr = sbuf_j;
2691   cols = sbuf_j + rend-rstart + 1;
2692   vals = sbuf_a;
2693   for (i=0; i<rend-rstart; i++) {
2694     row   = i + rstart;
2695     ncols = rptr[i+1] - rptr[i];
2696     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2697     vals += ncols;
2698     cols += ncols;
2699   }
2700   /* insert received matrix entries */
2701   for (imdex=0; imdex<nrecvs; imdex++) {
2702     rstart = rowrange[recv_rank[imdex]];
2703     rend   = rowrange[recv_rank[imdex]+1];
2704     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2705     rptr   = rbuf_j[imdex];
2706     cols   = rbuf_j[imdex] + rend-rstart + 1;
2707     vals   = rbuf_a[imdex];
2708     for (i=0; i<rend-rstart; i++) {
2709       row   = i + rstart;
2710       ncols = rptr[i+1] - rptr[i];
2711       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2712       vals += ncols;
2713       cols += ncols;
2714     }
2715   }
2716   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2717   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2718 
2719   if (reuse == MAT_INITIAL_MATRIX) {
2720     *matredundant = C;
2721 
2722     /* create a supporting struct and attach it to C for reuse */
2723     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2724     if (subsize == 1) {
2725       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2726       c->redundant = redund;
2727     } else {
2728       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2729       c->redundant = redund;
2730     }
2731 
2732     redund->nzlocal   = nzlocal;
2733     redund->nsends    = nsends;
2734     redund->nrecvs    = nrecvs;
2735     redund->send_rank = send_rank;
2736     redund->recv_rank = recv_rank;
2737     redund->sbuf_nz   = sbuf_nz;
2738     redund->rbuf_nz   = rbuf_nz;
2739     redund->sbuf_j    = sbuf_j;
2740     redund->sbuf_a    = sbuf_a;
2741     redund->rbuf_j    = rbuf_j;
2742     redund->rbuf_a    = rbuf_a;
2743     redund->psubcomm  = NULL;
2744 
2745     redund->Destroy = C->ops->destroy;
2746     C->ops->destroy = MatDestroy_MatRedundant;
2747   }
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 #undef __FUNCT__
2752 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2753 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2754 {
2755   PetscErrorCode ierr;
2756   MPI_Comm       comm;
2757   PetscMPIInt    size,subsize;
2758   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2759   Mat_Redundant  *redund=NULL;
2760   PetscSubcomm   psubcomm=NULL;
2761   MPI_Comm       subcomm_in=subcomm;
2762   Mat            *matseq;
2763   IS             isrow,iscol;
2764 
2765   PetscFunctionBegin;
2766   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2767     if (reuse ==  MAT_INITIAL_MATRIX) {
2768       /* create psubcomm, then get subcomm */
2769       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2770       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2771       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2772 
2773       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2774       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2775       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2776       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2777       subcomm = psubcomm->comm;
2778     } else { /* retrieve psubcomm and subcomm */
2779       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2780       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2781       if (subsize == 1) {
2782         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2783         redund = c->redundant;
2784       } else {
2785         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2786         redund = c->redundant;
2787       }
2788       psubcomm = redund->psubcomm;
2789     }
2790     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2791       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2792       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2793         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2794         if (subsize == 1) {
2795           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2796           c->redundant->psubcomm = psubcomm;
2797         } else {
2798           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2799           c->redundant->psubcomm = psubcomm ;
2800         }
2801       }
2802       PetscFunctionReturn(0);
2803     }
2804   }
2805 
2806   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2807   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2808   if (reuse == MAT_INITIAL_MATRIX) {
2809     /* create a local sequential matrix matseq[0] */
2810     mloc_sub = PETSC_DECIDE;
2811     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2812     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2813     rstart = rend - mloc_sub;
2814     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2815     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2816   } else { /* reuse == MAT_REUSE_MATRIX */
2817     if (subsize == 1) {
2818       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2819       redund = c->redundant;
2820     } else {
2821       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2822       redund = c->redundant;
2823     }
2824 
2825     isrow  = redund->isrow;
2826     iscol  = redund->iscol;
2827     matseq = redund->matseq;
2828   }
2829   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2830   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2831 
2832   if (reuse == MAT_INITIAL_MATRIX) {
2833     /* create a supporting struct and attach it to C for reuse */
2834     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2835     if (subsize == 1) {
2836       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2837       c->redundant = redund;
2838     } else {
2839       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2840       c->redundant = redund;
2841     }
2842     redund->isrow    = isrow;
2843     redund->iscol    = iscol;
2844     redund->matseq   = matseq;
2845     redund->psubcomm = psubcomm;
2846     redund->Destroy               = (*matredundant)->ops->destroy;
2847     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2848   }
2849   PetscFunctionReturn(0);
2850 }
2851 
2852 #undef __FUNCT__
2853 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2854 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2855 {
2856   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2857   PetscErrorCode ierr;
2858   PetscInt       i,*idxb = 0;
2859   PetscScalar    *va,*vb;
2860   Vec            vtmp;
2861 
2862   PetscFunctionBegin;
2863   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2864   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2865   if (idx) {
2866     for (i=0; i<A->rmap->n; i++) {
2867       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2868     }
2869   }
2870 
2871   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2872   if (idx) {
2873     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2874   }
2875   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2876   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2877 
2878   for (i=0; i<A->rmap->n; i++) {
2879     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2880       va[i] = vb[i];
2881       if (idx) idx[i] = a->garray[idxb[i]];
2882     }
2883   }
2884 
2885   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2886   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2887   ierr = PetscFree(idxb);CHKERRQ(ierr);
2888   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2889   PetscFunctionReturn(0);
2890 }
2891 
2892 #undef __FUNCT__
2893 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2894 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2895 {
2896   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2897   PetscErrorCode ierr;
2898   PetscInt       i,*idxb = 0;
2899   PetscScalar    *va,*vb;
2900   Vec            vtmp;
2901 
2902   PetscFunctionBegin;
2903   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2904   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2905   if (idx) {
2906     for (i=0; i<A->cmap->n; i++) {
2907       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2908     }
2909   }
2910 
2911   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2912   if (idx) {
2913     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2914   }
2915   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2916   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2917 
2918   for (i=0; i<A->rmap->n; i++) {
2919     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2920       va[i] = vb[i];
2921       if (idx) idx[i] = a->garray[idxb[i]];
2922     }
2923   }
2924 
2925   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2926   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2927   ierr = PetscFree(idxb);CHKERRQ(ierr);
2928   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2929   PetscFunctionReturn(0);
2930 }
2931 
2932 #undef __FUNCT__
2933 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2934 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2935 {
2936   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2937   PetscInt       n      = A->rmap->n;
2938   PetscInt       cstart = A->cmap->rstart;
2939   PetscInt       *cmap  = mat->garray;
2940   PetscInt       *diagIdx, *offdiagIdx;
2941   Vec            diagV, offdiagV;
2942   PetscScalar    *a, *diagA, *offdiagA;
2943   PetscInt       r;
2944   PetscErrorCode ierr;
2945 
2946   PetscFunctionBegin;
2947   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2948   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2949   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2950   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2951   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2952   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2953   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2954   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2955   for (r = 0; r < n; ++r) {
2956     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2957       a[r]   = diagA[r];
2958       idx[r] = cstart + diagIdx[r];
2959     } else {
2960       a[r]   = offdiagA[r];
2961       idx[r] = cmap[offdiagIdx[r]];
2962     }
2963   }
2964   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2965   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2966   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2967   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2968   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2969   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2970   PetscFunctionReturn(0);
2971 }
2972 
2973 #undef __FUNCT__
2974 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2975 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2976 {
2977   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2978   PetscInt       n      = A->rmap->n;
2979   PetscInt       cstart = A->cmap->rstart;
2980   PetscInt       *cmap  = mat->garray;
2981   PetscInt       *diagIdx, *offdiagIdx;
2982   Vec            diagV, offdiagV;
2983   PetscScalar    *a, *diagA, *offdiagA;
2984   PetscInt       r;
2985   PetscErrorCode ierr;
2986 
2987   PetscFunctionBegin;
2988   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2989   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2990   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2991   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2992   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2993   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2994   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2995   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2996   for (r = 0; r < n; ++r) {
2997     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2998       a[r]   = diagA[r];
2999       idx[r] = cstart + diagIdx[r];
3000     } else {
3001       a[r]   = offdiagA[r];
3002       idx[r] = cmap[offdiagIdx[r]];
3003     }
3004   }
3005   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3006   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3007   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3008   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3009   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3010   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3011   PetscFunctionReturn(0);
3012 }
3013 
3014 #undef __FUNCT__
3015 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3016 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3017 {
3018   PetscErrorCode ierr;
3019   Mat            *dummy;
3020 
3021   PetscFunctionBegin;
3022   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3023   *newmat = *dummy;
3024   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3025   PetscFunctionReturn(0);
3026 }
3027 
3028 #undef __FUNCT__
3029 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3030 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3031 {
3032   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3033   PetscErrorCode ierr;
3034 
3035   PetscFunctionBegin;
3036   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3037   PetscFunctionReturn(0);
3038 }
3039 
3040 #undef __FUNCT__
3041 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3042 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3043 {
3044   PetscErrorCode ierr;
3045   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3046 
3047   PetscFunctionBegin;
3048   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3049   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3050   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3051   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3052   PetscFunctionReturn(0);
3053 }
3054 
3055 /* -------------------------------------------------------------------*/
3056 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3057                                        MatGetRow_MPIAIJ,
3058                                        MatRestoreRow_MPIAIJ,
3059                                        MatMult_MPIAIJ,
3060                                 /* 4*/ MatMultAdd_MPIAIJ,
3061                                        MatMultTranspose_MPIAIJ,
3062                                        MatMultTransposeAdd_MPIAIJ,
3063 #if defined(PETSC_HAVE_PBGL)
3064                                        MatSolve_MPIAIJ,
3065 #else
3066                                        0,
3067 #endif
3068                                        0,
3069                                        0,
3070                                 /*10*/ 0,
3071                                        0,
3072                                        0,
3073                                        MatSOR_MPIAIJ,
3074                                        MatTranspose_MPIAIJ,
3075                                 /*15*/ MatGetInfo_MPIAIJ,
3076                                        MatEqual_MPIAIJ,
3077                                        MatGetDiagonal_MPIAIJ,
3078                                        MatDiagonalScale_MPIAIJ,
3079                                        MatNorm_MPIAIJ,
3080                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3081                                        MatAssemblyEnd_MPIAIJ,
3082                                        MatSetOption_MPIAIJ,
3083                                        MatZeroEntries_MPIAIJ,
3084                                 /*24*/ MatZeroRows_MPIAIJ,
3085                                        0,
3086 #if defined(PETSC_HAVE_PBGL)
3087                                        0,
3088 #else
3089                                        0,
3090 #endif
3091                                        0,
3092                                        0,
3093                                 /*29*/ MatSetUp_MPIAIJ,
3094 #if defined(PETSC_HAVE_PBGL)
3095                                        0,
3096 #else
3097                                        0,
3098 #endif
3099                                        0,
3100                                        0,
3101                                        0,
3102                                 /*34*/ MatDuplicate_MPIAIJ,
3103                                        0,
3104                                        0,
3105                                        0,
3106                                        0,
3107                                 /*39*/ MatAXPY_MPIAIJ,
3108                                        MatGetSubMatrices_MPIAIJ,
3109                                        MatIncreaseOverlap_MPIAIJ,
3110                                        MatGetValues_MPIAIJ,
3111                                        MatCopy_MPIAIJ,
3112                                 /*44*/ MatGetRowMax_MPIAIJ,
3113                                        MatScale_MPIAIJ,
3114                                        0,
3115                                        0,
3116                                        MatZeroRowsColumns_MPIAIJ,
3117                                 /*49*/ MatSetRandom_MPIAIJ,
3118                                        0,
3119                                        0,
3120                                        0,
3121                                        0,
3122                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3123                                        0,
3124                                        MatSetUnfactored_MPIAIJ,
3125                                        MatPermute_MPIAIJ,
3126                                        0,
3127                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3128                                        MatDestroy_MPIAIJ,
3129                                        MatView_MPIAIJ,
3130                                        0,
3131                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3132                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3133                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3134                                        0,
3135                                        0,
3136                                        0,
3137                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3138                                        MatGetRowMinAbs_MPIAIJ,
3139                                        0,
3140                                        MatSetColoring_MPIAIJ,
3141                                        0,
3142                                        MatSetValuesAdifor_MPIAIJ,
3143                                 /*75*/ MatFDColoringApply_AIJ,
3144                                        0,
3145                                        0,
3146                                        0,
3147                                        MatFindZeroDiagonals_MPIAIJ,
3148                                 /*80*/ 0,
3149                                        0,
3150                                        0,
3151                                 /*83*/ MatLoad_MPIAIJ,
3152                                        0,
3153                                        0,
3154                                        0,
3155                                        0,
3156                                        0,
3157                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3158                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3159                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3160                                        MatPtAP_MPIAIJ_MPIAIJ,
3161                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3162                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3163                                        0,
3164                                        0,
3165                                        0,
3166                                        0,
3167                                 /*99*/ 0,
3168                                        0,
3169                                        0,
3170                                        MatConjugate_MPIAIJ,
3171                                        0,
3172                                 /*104*/MatSetValuesRow_MPIAIJ,
3173                                        MatRealPart_MPIAIJ,
3174                                        MatImaginaryPart_MPIAIJ,
3175                                        0,
3176                                        0,
3177                                 /*109*/0,
3178                                        MatGetRedundantMatrix_MPIAIJ,
3179                                        MatGetRowMin_MPIAIJ,
3180                                        0,
3181                                        0,
3182                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3183                                        0,
3184                                        0,
3185                                        0,
3186                                        0,
3187                                 /*119*/0,
3188                                        0,
3189                                        0,
3190                                        0,
3191                                        MatGetMultiProcBlock_MPIAIJ,
3192                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3193                                        MatGetColumnNorms_MPIAIJ,
3194                                        MatInvertBlockDiagonal_MPIAIJ,
3195                                        0,
3196                                        MatGetSubMatricesParallel_MPIAIJ,
3197                                 /*129*/0,
3198                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3199                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3200                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3201                                        0,
3202                                 /*134*/0,
3203                                        0,
3204                                        0,
3205                                        0,
3206                                        0,
3207                                 /*139*/0,
3208                                        0,
3209                                        0,
3210                                        MatFDColoringSetUp_MPIXAIJ
3211 };
3212 
3213 /* ----------------------------------------------------------------------------------------*/
3214 
3215 #undef __FUNCT__
3216 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3217 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3218 {
3219   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3220   PetscErrorCode ierr;
3221 
3222   PetscFunctionBegin;
3223   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3224   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3225   PetscFunctionReturn(0);
3226 }
3227 
3228 #undef __FUNCT__
3229 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3230 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3231 {
3232   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3233   PetscErrorCode ierr;
3234 
3235   PetscFunctionBegin;
3236   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3237   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3238   PetscFunctionReturn(0);
3239 }
3240 
3241 #undef __FUNCT__
3242 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3243 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3244 {
3245   Mat_MPIAIJ     *b;
3246   PetscErrorCode ierr;
3247 
3248   PetscFunctionBegin;
3249   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3250   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3251   b = (Mat_MPIAIJ*)B->data;
3252 
3253   if (!B->preallocated) {
3254     /* Explicitly create 2 MATSEQAIJ matrices. */
3255     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3256     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3257     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3258     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3259     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3260     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3261     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3262     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3263     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3264     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3265   }
3266 
3267   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3268   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3269   B->preallocated = PETSC_TRUE;
3270   PetscFunctionReturn(0);
3271 }
3272 
3273 #undef __FUNCT__
3274 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3275 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3276 {
3277   Mat            mat;
3278   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3279   PetscErrorCode ierr;
3280 
3281   PetscFunctionBegin;
3282   *newmat = 0;
3283   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3284   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3285   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3286   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3287   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3288   a       = (Mat_MPIAIJ*)mat->data;
3289 
3290   mat->factortype   = matin->factortype;
3291   mat->assembled    = PETSC_TRUE;
3292   mat->insertmode   = NOT_SET_VALUES;
3293   mat->preallocated = PETSC_TRUE;
3294 
3295   a->size         = oldmat->size;
3296   a->rank         = oldmat->rank;
3297   a->donotstash   = oldmat->donotstash;
3298   a->roworiented  = oldmat->roworiented;
3299   a->rowindices   = 0;
3300   a->rowvalues    = 0;
3301   a->getrowactive = PETSC_FALSE;
3302 
3303   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3304   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3305 
3306   if (oldmat->colmap) {
3307 #if defined(PETSC_USE_CTABLE)
3308     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3309 #else
3310     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3311     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3312     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3313 #endif
3314   } else a->colmap = 0;
3315   if (oldmat->garray) {
3316     PetscInt len;
3317     len  = oldmat->B->cmap->n;
3318     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3319     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3320     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3321   } else a->garray = 0;
3322 
3323   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3324   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3325   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3326   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3327   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3328   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3329   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3330   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3331   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3332   *newmat = mat;
3333   PetscFunctionReturn(0);
3334 }
3335 
3336 
3337 
3338 #undef __FUNCT__
3339 #define __FUNCT__ "MatLoad_MPIAIJ"
3340 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3341 {
3342   PetscScalar    *vals,*svals;
3343   MPI_Comm       comm;
3344   PetscErrorCode ierr;
3345   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3346   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3347   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3348   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3349   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3350   int            fd;
3351   PetscInt       bs = 1;
3352 
3353   PetscFunctionBegin;
3354   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3355   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3356   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3357   if (!rank) {
3358     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3359     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3360     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3361   }
3362 
3363   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3364   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3365   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3366 
3367   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3368 
3369   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3370   M    = header[1]; N = header[2];
3371   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3372   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3373   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3374 
3375   /* If global sizes are set, check if they are consistent with that given in the file */
3376   if (sizesset) {
3377     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3378   }
3379   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3380   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3381 
3382   /* determine ownership of all (block) rows */
3383   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3384   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3385   else m = newMat->rmap->n; /* Set by user */
3386 
3387   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3388   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3389 
3390   /* First process needs enough room for process with most rows */
3391   if (!rank) {
3392     mmax = rowners[1];
3393     for (i=2; i<=size; i++) {
3394       mmax = PetscMax(mmax, rowners[i]);
3395     }
3396   } else mmax = -1;             /* unused, but compilers complain */
3397 
3398   rowners[0] = 0;
3399   for (i=2; i<=size; i++) {
3400     rowners[i] += rowners[i-1];
3401   }
3402   rstart = rowners[rank];
3403   rend   = rowners[rank+1];
3404 
3405   /* distribute row lengths to all processors */
3406   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3407   if (!rank) {
3408     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3409     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3410     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3411     for (j=0; j<m; j++) {
3412       procsnz[0] += ourlens[j];
3413     }
3414     for (i=1; i<size; i++) {
3415       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3416       /* calculate the number of nonzeros on each processor */
3417       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3418         procsnz[i] += rowlengths[j];
3419       }
3420       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3421     }
3422     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3423   } else {
3424     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3425   }
3426 
3427   if (!rank) {
3428     /* determine max buffer needed and allocate it */
3429     maxnz = 0;
3430     for (i=0; i<size; i++) {
3431       maxnz = PetscMax(maxnz,procsnz[i]);
3432     }
3433     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3434 
3435     /* read in my part of the matrix column indices  */
3436     nz   = procsnz[0];
3437     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3438     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3439 
3440     /* read in every one elses and ship off */
3441     for (i=1; i<size; i++) {
3442       nz   = procsnz[i];
3443       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3444       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3445     }
3446     ierr = PetscFree(cols);CHKERRQ(ierr);
3447   } else {
3448     /* determine buffer space needed for message */
3449     nz = 0;
3450     for (i=0; i<m; i++) {
3451       nz += ourlens[i];
3452     }
3453     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3454 
3455     /* receive message of column indices*/
3456     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3457   }
3458 
3459   /* determine column ownership if matrix is not square */
3460   if (N != M) {
3461     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3462     else n = newMat->cmap->n;
3463     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3464     cstart = cend - n;
3465   } else {
3466     cstart = rstart;
3467     cend   = rend;
3468     n      = cend - cstart;
3469   }
3470 
3471   /* loop over local rows, determining number of off diagonal entries */
3472   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3473   jj   = 0;
3474   for (i=0; i<m; i++) {
3475     for (j=0; j<ourlens[i]; j++) {
3476       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3477       jj++;
3478     }
3479   }
3480 
3481   for (i=0; i<m; i++) {
3482     ourlens[i] -= offlens[i];
3483   }
3484   if (!sizesset) {
3485     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3486   }
3487 
3488   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3489 
3490   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3491 
3492   for (i=0; i<m; i++) {
3493     ourlens[i] += offlens[i];
3494   }
3495 
3496   if (!rank) {
3497     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3498 
3499     /* read in my part of the matrix numerical values  */
3500     nz   = procsnz[0];
3501     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3502 
3503     /* insert into matrix */
3504     jj      = rstart;
3505     smycols = mycols;
3506     svals   = vals;
3507     for (i=0; i<m; i++) {
3508       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3509       smycols += ourlens[i];
3510       svals   += ourlens[i];
3511       jj++;
3512     }
3513 
3514     /* read in other processors and ship out */
3515     for (i=1; i<size; i++) {
3516       nz   = procsnz[i];
3517       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3518       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3519     }
3520     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3521   } else {
3522     /* receive numeric values */
3523     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3524 
3525     /* receive message of values*/
3526     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3527 
3528     /* insert into matrix */
3529     jj      = rstart;
3530     smycols = mycols;
3531     svals   = vals;
3532     for (i=0; i<m; i++) {
3533       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3534       smycols += ourlens[i];
3535       svals   += ourlens[i];
3536       jj++;
3537     }
3538   }
3539   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3540   ierr = PetscFree(vals);CHKERRQ(ierr);
3541   ierr = PetscFree(mycols);CHKERRQ(ierr);
3542   ierr = PetscFree(rowners);CHKERRQ(ierr);
3543   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3544   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3545   PetscFunctionReturn(0);
3546 }
3547 
3548 #undef __FUNCT__
3549 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3550 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3551 {
3552   PetscErrorCode ierr;
3553   IS             iscol_local;
3554   PetscInt       csize;
3555 
3556   PetscFunctionBegin;
3557   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3558   if (call == MAT_REUSE_MATRIX) {
3559     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3560     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3561   } else {
3562     PetscInt cbs;
3563     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3564     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3565     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3566   }
3567   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3568   if (call == MAT_INITIAL_MATRIX) {
3569     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3570     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3571   }
3572   PetscFunctionReturn(0);
3573 }
3574 
3575 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3576 #undef __FUNCT__
3577 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3578 /*
3579     Not great since it makes two copies of the submatrix, first an SeqAIJ
3580   in local and then by concatenating the local matrices the end result.
3581   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3582 
3583   Note: This requires a sequential iscol with all indices.
3584 */
3585 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3586 {
3587   PetscErrorCode ierr;
3588   PetscMPIInt    rank,size;
3589   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3590   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3591   PetscBool      allcolumns, colflag;
3592   Mat            M,Mreuse;
3593   MatScalar      *vwork,*aa;
3594   MPI_Comm       comm;
3595   Mat_SeqAIJ     *aij;
3596 
3597   PetscFunctionBegin;
3598   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3599   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3600   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3601 
3602   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3603   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3604   if (colflag && ncol == mat->cmap->N) {
3605     allcolumns = PETSC_TRUE;
3606   } else {
3607     allcolumns = PETSC_FALSE;
3608   }
3609   if (call ==  MAT_REUSE_MATRIX) {
3610     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3611     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3612     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3613   } else {
3614     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3615   }
3616 
3617   /*
3618       m - number of local rows
3619       n - number of columns (same on all processors)
3620       rstart - first row in new global matrix generated
3621   */
3622   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3623   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3624   if (call == MAT_INITIAL_MATRIX) {
3625     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3626     ii  = aij->i;
3627     jj  = aij->j;
3628 
3629     /*
3630         Determine the number of non-zeros in the diagonal and off-diagonal
3631         portions of the matrix in order to do correct preallocation
3632     */
3633 
3634     /* first get start and end of "diagonal" columns */
3635     if (csize == PETSC_DECIDE) {
3636       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3637       if (mglobal == n) { /* square matrix */
3638         nlocal = m;
3639       } else {
3640         nlocal = n/size + ((n % size) > rank);
3641       }
3642     } else {
3643       nlocal = csize;
3644     }
3645     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3646     rstart = rend - nlocal;
3647     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3648 
3649     /* next, compute all the lengths */
3650     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3651     olens = dlens + m;
3652     for (i=0; i<m; i++) {
3653       jend = ii[i+1] - ii[i];
3654       olen = 0;
3655       dlen = 0;
3656       for (j=0; j<jend; j++) {
3657         if (*jj < rstart || *jj >= rend) olen++;
3658         else dlen++;
3659         jj++;
3660       }
3661       olens[i] = olen;
3662       dlens[i] = dlen;
3663     }
3664     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3665     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3666     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3667     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3668     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3669     ierr = PetscFree(dlens);CHKERRQ(ierr);
3670   } else {
3671     PetscInt ml,nl;
3672 
3673     M    = *newmat;
3674     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3675     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3676     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3677     /*
3678          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3679        rather than the slower MatSetValues().
3680     */
3681     M->was_assembled = PETSC_TRUE;
3682     M->assembled     = PETSC_FALSE;
3683   }
3684   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3685   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3686   ii   = aij->i;
3687   jj   = aij->j;
3688   aa   = aij->a;
3689   for (i=0; i<m; i++) {
3690     row   = rstart + i;
3691     nz    = ii[i+1] - ii[i];
3692     cwork = jj;     jj += nz;
3693     vwork = aa;     aa += nz;
3694     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3695   }
3696 
3697   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3698   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3699   *newmat = M;
3700 
3701   /* save submatrix used in processor for next request */
3702   if (call ==  MAT_INITIAL_MATRIX) {
3703     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3704     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3705   }
3706   PetscFunctionReturn(0);
3707 }
3708 
3709 #undef __FUNCT__
3710 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3711 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3712 {
3713   PetscInt       m,cstart, cend,j,nnz,i,d;
3714   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3715   const PetscInt *JJ;
3716   PetscScalar    *values;
3717   PetscErrorCode ierr;
3718 
3719   PetscFunctionBegin;
3720   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3721 
3722   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3723   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3724   m      = B->rmap->n;
3725   cstart = B->cmap->rstart;
3726   cend   = B->cmap->rend;
3727   rstart = B->rmap->rstart;
3728 
3729   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3730 
3731 #if defined(PETSC_USE_DEBUGGING)
3732   for (i=0; i<m; i++) {
3733     nnz = Ii[i+1]- Ii[i];
3734     JJ  = J + Ii[i];
3735     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3736     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3737     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3738   }
3739 #endif
3740 
3741   for (i=0; i<m; i++) {
3742     nnz     = Ii[i+1]- Ii[i];
3743     JJ      = J + Ii[i];
3744     nnz_max = PetscMax(nnz_max,nnz);
3745     d       = 0;
3746     for (j=0; j<nnz; j++) {
3747       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3748     }
3749     d_nnz[i] = d;
3750     o_nnz[i] = nnz - d;
3751   }
3752   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3753   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3754 
3755   if (v) values = (PetscScalar*)v;
3756   else {
3757     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3758   }
3759 
3760   for (i=0; i<m; i++) {
3761     ii   = i + rstart;
3762     nnz  = Ii[i+1]- Ii[i];
3763     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3764   }
3765   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3766   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3767 
3768   if (!v) {
3769     ierr = PetscFree(values);CHKERRQ(ierr);
3770   }
3771   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3772   PetscFunctionReturn(0);
3773 }
3774 
3775 #undef __FUNCT__
3776 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3777 /*@
3778    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3779    (the default parallel PETSc format).
3780 
3781    Collective on MPI_Comm
3782 
3783    Input Parameters:
3784 +  B - the matrix
3785 .  i - the indices into j for the start of each local row (starts with zero)
3786 .  j - the column indices for each local row (starts with zero)
3787 -  v - optional values in the matrix
3788 
3789    Level: developer
3790 
3791    Notes:
3792        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3793      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3794      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3795 
3796        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3797 
3798        The format which is used for the sparse matrix input, is equivalent to a
3799     row-major ordering.. i.e for the following matrix, the input data expected is
3800     as shown:
3801 
3802         1 0 0
3803         2 0 3     P0
3804        -------
3805         4 5 6     P1
3806 
3807      Process0 [P0]: rows_owned=[0,1]
3808         i =  {0,1,3}  [size = nrow+1  = 2+1]
3809         j =  {0,0,2}  [size = nz = 6]
3810         v =  {1,2,3}  [size = nz = 6]
3811 
3812      Process1 [P1]: rows_owned=[2]
3813         i =  {0,3}    [size = nrow+1  = 1+1]
3814         j =  {0,1,2}  [size = nz = 6]
3815         v =  {4,5,6}  [size = nz = 6]
3816 
3817 .keywords: matrix, aij, compressed row, sparse, parallel
3818 
3819 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3820           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3821 @*/
3822 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3823 {
3824   PetscErrorCode ierr;
3825 
3826   PetscFunctionBegin;
3827   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3828   PetscFunctionReturn(0);
3829 }
3830 
3831 #undef __FUNCT__
3832 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3833 /*@C
3834    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3835    (the default parallel PETSc format).  For good matrix assembly performance
3836    the user should preallocate the matrix storage by setting the parameters
3837    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3838    performance can be increased by more than a factor of 50.
3839 
3840    Collective on MPI_Comm
3841 
3842    Input Parameters:
3843 +  A - the matrix
3844 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3845            (same value is used for all local rows)
3846 .  d_nnz - array containing the number of nonzeros in the various rows of the
3847            DIAGONAL portion of the local submatrix (possibly different for each row)
3848            or NULL, if d_nz is used to specify the nonzero structure.
3849            The size of this array is equal to the number of local rows, i.e 'm'.
3850            For matrices that will be factored, you must leave room for (and set)
3851            the diagonal entry even if it is zero.
3852 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3853            submatrix (same value is used for all local rows).
3854 -  o_nnz - array containing the number of nonzeros in the various rows of the
3855            OFF-DIAGONAL portion of the local submatrix (possibly different for
3856            each row) or NULL, if o_nz is used to specify the nonzero
3857            structure. The size of this array is equal to the number
3858            of local rows, i.e 'm'.
3859 
3860    If the *_nnz parameter is given then the *_nz parameter is ignored
3861 
3862    The AIJ format (also called the Yale sparse matrix format or
3863    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3864    storage.  The stored row and column indices begin with zero.
3865    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3866 
3867    The parallel matrix is partitioned such that the first m0 rows belong to
3868    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3869    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3870 
3871    The DIAGONAL portion of the local submatrix of a processor can be defined
3872    as the submatrix which is obtained by extraction the part corresponding to
3873    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3874    first row that belongs to the processor, r2 is the last row belonging to
3875    the this processor, and c1-c2 is range of indices of the local part of a
3876    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3877    common case of a square matrix, the row and column ranges are the same and
3878    the DIAGONAL part is also square. The remaining portion of the local
3879    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3880 
3881    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3882 
3883    You can call MatGetInfo() to get information on how effective the preallocation was;
3884    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3885    You can also run with the option -info and look for messages with the string
3886    malloc in them to see if additional memory allocation was needed.
3887 
3888    Example usage:
3889 
3890    Consider the following 8x8 matrix with 34 non-zero values, that is
3891    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3892    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3893    as follows:
3894 
3895 .vb
3896             1  2  0  |  0  3  0  |  0  4
3897     Proc0   0  5  6  |  7  0  0  |  8  0
3898             9  0 10  | 11  0  0  | 12  0
3899     -------------------------------------
3900            13  0 14  | 15 16 17  |  0  0
3901     Proc1   0 18  0  | 19 20 21  |  0  0
3902             0  0  0  | 22 23  0  | 24  0
3903     -------------------------------------
3904     Proc2  25 26 27  |  0  0 28  | 29  0
3905            30  0  0  | 31 32 33  |  0 34
3906 .ve
3907 
3908    This can be represented as a collection of submatrices as:
3909 
3910 .vb
3911       A B C
3912       D E F
3913       G H I
3914 .ve
3915 
3916    Where the submatrices A,B,C are owned by proc0, D,E,F are
3917    owned by proc1, G,H,I are owned by proc2.
3918 
3919    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3920    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3921    The 'M','N' parameters are 8,8, and have the same values on all procs.
3922 
3923    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3924    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3925    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3926    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3927    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3928    matrix, ans [DF] as another SeqAIJ matrix.
3929 
3930    When d_nz, o_nz parameters are specified, d_nz storage elements are
3931    allocated for every row of the local diagonal submatrix, and o_nz
3932    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3933    One way to choose d_nz and o_nz is to use the max nonzerors per local
3934    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3935    In this case, the values of d_nz,o_nz are:
3936 .vb
3937      proc0 : dnz = 2, o_nz = 2
3938      proc1 : dnz = 3, o_nz = 2
3939      proc2 : dnz = 1, o_nz = 4
3940 .ve
3941    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3942    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3943    for proc3. i.e we are using 12+15+10=37 storage locations to store
3944    34 values.
3945 
3946    When d_nnz, o_nnz parameters are specified, the storage is specified
3947    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3948    In the above case the values for d_nnz,o_nnz are:
3949 .vb
3950      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3951      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3952      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3953 .ve
3954    Here the space allocated is sum of all the above values i.e 34, and
3955    hence pre-allocation is perfect.
3956 
3957    Level: intermediate
3958 
3959 .keywords: matrix, aij, compressed row, sparse, parallel
3960 
3961 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3962           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3963 @*/
3964 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3965 {
3966   PetscErrorCode ierr;
3967 
3968   PetscFunctionBegin;
3969   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3970   PetscValidType(B,1);
3971   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3972   PetscFunctionReturn(0);
3973 }
3974 
3975 #undef __FUNCT__
3976 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3977 /*@
3978      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3979          CSR format the local rows.
3980 
3981    Collective on MPI_Comm
3982 
3983    Input Parameters:
3984 +  comm - MPI communicator
3985 .  m - number of local rows (Cannot be PETSC_DECIDE)
3986 .  n - This value should be the same as the local size used in creating the
3987        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3988        calculated if N is given) For square matrices n is almost always m.
3989 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3990 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3991 .   i - row indices
3992 .   j - column indices
3993 -   a - matrix values
3994 
3995    Output Parameter:
3996 .   mat - the matrix
3997 
3998    Level: intermediate
3999 
4000    Notes:
4001        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4002      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4003      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4004 
4005        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4006 
4007        The format which is used for the sparse matrix input, is equivalent to a
4008     row-major ordering.. i.e for the following matrix, the input data expected is
4009     as shown:
4010 
4011         1 0 0
4012         2 0 3     P0
4013        -------
4014         4 5 6     P1
4015 
4016      Process0 [P0]: rows_owned=[0,1]
4017         i =  {0,1,3}  [size = nrow+1  = 2+1]
4018         j =  {0,0,2}  [size = nz = 6]
4019         v =  {1,2,3}  [size = nz = 6]
4020 
4021      Process1 [P1]: rows_owned=[2]
4022         i =  {0,3}    [size = nrow+1  = 1+1]
4023         j =  {0,1,2}  [size = nz = 6]
4024         v =  {4,5,6}  [size = nz = 6]
4025 
4026 .keywords: matrix, aij, compressed row, sparse, parallel
4027 
4028 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4029           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4030 @*/
4031 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4032 {
4033   PetscErrorCode ierr;
4034 
4035   PetscFunctionBegin;
4036   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4037   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4038   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4039   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4040   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4041   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4042   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4043   PetscFunctionReturn(0);
4044 }
4045 
4046 #undef __FUNCT__
4047 #define __FUNCT__ "MatCreateAIJ"
4048 /*@C
4049    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4050    (the default parallel PETSc format).  For good matrix assembly performance
4051    the user should preallocate the matrix storage by setting the parameters
4052    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4053    performance can be increased by more than a factor of 50.
4054 
4055    Collective on MPI_Comm
4056 
4057    Input Parameters:
4058 +  comm - MPI communicator
4059 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4060            This value should be the same as the local size used in creating the
4061            y vector for the matrix-vector product y = Ax.
4062 .  n - This value should be the same as the local size used in creating the
4063        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4064        calculated if N is given) For square matrices n is almost always m.
4065 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4066 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4067 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4068            (same value is used for all local rows)
4069 .  d_nnz - array containing the number of nonzeros in the various rows of the
4070            DIAGONAL portion of the local submatrix (possibly different for each row)
4071            or NULL, if d_nz is used to specify the nonzero structure.
4072            The size of this array is equal to the number of local rows, i.e 'm'.
4073 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4074            submatrix (same value is used for all local rows).
4075 -  o_nnz - array containing the number of nonzeros in the various rows of the
4076            OFF-DIAGONAL portion of the local submatrix (possibly different for
4077            each row) or NULL, if o_nz is used to specify the nonzero
4078            structure. The size of this array is equal to the number
4079            of local rows, i.e 'm'.
4080 
4081    Output Parameter:
4082 .  A - the matrix
4083 
4084    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4085    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4086    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4087 
4088    Notes:
4089    If the *_nnz parameter is given then the *_nz parameter is ignored
4090 
4091    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4092    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4093    storage requirements for this matrix.
4094 
4095    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4096    processor than it must be used on all processors that share the object for
4097    that argument.
4098 
4099    The user MUST specify either the local or global matrix dimensions
4100    (possibly both).
4101 
4102    The parallel matrix is partitioned across processors such that the
4103    first m0 rows belong to process 0, the next m1 rows belong to
4104    process 1, the next m2 rows belong to process 2 etc.. where
4105    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4106    values corresponding to [m x N] submatrix.
4107 
4108    The columns are logically partitioned with the n0 columns belonging
4109    to 0th partition, the next n1 columns belonging to the next
4110    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4111 
4112    The DIAGONAL portion of the local submatrix on any given processor
4113    is the submatrix corresponding to the rows and columns m,n
4114    corresponding to the given processor. i.e diagonal matrix on
4115    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4116    etc. The remaining portion of the local submatrix [m x (N-n)]
4117    constitute the OFF-DIAGONAL portion. The example below better
4118    illustrates this concept.
4119 
4120    For a square global matrix we define each processor's diagonal portion
4121    to be its local rows and the corresponding columns (a square submatrix);
4122    each processor's off-diagonal portion encompasses the remainder of the
4123    local matrix (a rectangular submatrix).
4124 
4125    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4126 
4127    When calling this routine with a single process communicator, a matrix of
4128    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4129    type of communicator, use the construction mechanism:
4130      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4131 
4132    By default, this format uses inodes (identical nodes) when possible.
4133    We search for consecutive rows with the same nonzero structure, thereby
4134    reusing matrix information to achieve increased efficiency.
4135 
4136    Options Database Keys:
4137 +  -mat_no_inode  - Do not use inodes
4138 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4139 -  -mat_aij_oneindex - Internally use indexing starting at 1
4140         rather than 0.  Note that when calling MatSetValues(),
4141         the user still MUST index entries starting at 0!
4142 
4143 
4144    Example usage:
4145 
4146    Consider the following 8x8 matrix with 34 non-zero values, that is
4147    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4148    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4149    as follows:
4150 
4151 .vb
4152             1  2  0  |  0  3  0  |  0  4
4153     Proc0   0  5  6  |  7  0  0  |  8  0
4154             9  0 10  | 11  0  0  | 12  0
4155     -------------------------------------
4156            13  0 14  | 15 16 17  |  0  0
4157     Proc1   0 18  0  | 19 20 21  |  0  0
4158             0  0  0  | 22 23  0  | 24  0
4159     -------------------------------------
4160     Proc2  25 26 27  |  0  0 28  | 29  0
4161            30  0  0  | 31 32 33  |  0 34
4162 .ve
4163 
4164    This can be represented as a collection of submatrices as:
4165 
4166 .vb
4167       A B C
4168       D E F
4169       G H I
4170 .ve
4171 
4172    Where the submatrices A,B,C are owned by proc0, D,E,F are
4173    owned by proc1, G,H,I are owned by proc2.
4174 
4175    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4176    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4177    The 'M','N' parameters are 8,8, and have the same values on all procs.
4178 
4179    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4180    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4181    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4182    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4183    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4184    matrix, ans [DF] as another SeqAIJ matrix.
4185 
4186    When d_nz, o_nz parameters are specified, d_nz storage elements are
4187    allocated for every row of the local diagonal submatrix, and o_nz
4188    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4189    One way to choose d_nz and o_nz is to use the max nonzerors per local
4190    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4191    In this case, the values of d_nz,o_nz are:
4192 .vb
4193      proc0 : dnz = 2, o_nz = 2
4194      proc1 : dnz = 3, o_nz = 2
4195      proc2 : dnz = 1, o_nz = 4
4196 .ve
4197    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4198    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4199    for proc3. i.e we are using 12+15+10=37 storage locations to store
4200    34 values.
4201 
4202    When d_nnz, o_nnz parameters are specified, the storage is specified
4203    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4204    In the above case the values for d_nnz,o_nnz are:
4205 .vb
4206      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4207      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4208      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4209 .ve
4210    Here the space allocated is sum of all the above values i.e 34, and
4211    hence pre-allocation is perfect.
4212 
4213    Level: intermediate
4214 
4215 .keywords: matrix, aij, compressed row, sparse, parallel
4216 
4217 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4218           MPIAIJ, MatCreateMPIAIJWithArrays()
4219 @*/
4220 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4221 {
4222   PetscErrorCode ierr;
4223   PetscMPIInt    size;
4224 
4225   PetscFunctionBegin;
4226   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4227   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4228   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4229   if (size > 1) {
4230     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4231     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4232   } else {
4233     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4234     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4235   }
4236   PetscFunctionReturn(0);
4237 }
4238 
4239 #undef __FUNCT__
4240 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4241 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4242 {
4243   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4244 
4245   PetscFunctionBegin;
4246   *Ad     = a->A;
4247   *Ao     = a->B;
4248   *colmap = a->garray;
4249   PetscFunctionReturn(0);
4250 }
4251 
4252 #undef __FUNCT__
4253 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4254 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4255 {
4256   PetscErrorCode ierr;
4257   PetscInt       i;
4258   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4259 
4260   PetscFunctionBegin;
4261   if (coloring->ctype == IS_COLORING_GLOBAL) {
4262     ISColoringValue *allcolors,*colors;
4263     ISColoring      ocoloring;
4264 
4265     /* set coloring for diagonal portion */
4266     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4267 
4268     /* set coloring for off-diagonal portion */
4269     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4270     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4271     for (i=0; i<a->B->cmap->n; i++) {
4272       colors[i] = allcolors[a->garray[i]];
4273     }
4274     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4275     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4276     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4277     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4278   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4279     ISColoringValue *colors;
4280     PetscInt        *larray;
4281     ISColoring      ocoloring;
4282 
4283     /* set coloring for diagonal portion */
4284     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4285     for (i=0; i<a->A->cmap->n; i++) {
4286       larray[i] = i + A->cmap->rstart;
4287     }
4288     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4289     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4290     for (i=0; i<a->A->cmap->n; i++) {
4291       colors[i] = coloring->colors[larray[i]];
4292     }
4293     ierr = PetscFree(larray);CHKERRQ(ierr);
4294     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4295     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4296     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4297 
4298     /* set coloring for off-diagonal portion */
4299     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4300     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4301     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4302     for (i=0; i<a->B->cmap->n; i++) {
4303       colors[i] = coloring->colors[larray[i]];
4304     }
4305     ierr = PetscFree(larray);CHKERRQ(ierr);
4306     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4307     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4308     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4309   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4310   PetscFunctionReturn(0);
4311 }
4312 
4313 #undef __FUNCT__
4314 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4315 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4316 {
4317   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4318   PetscErrorCode ierr;
4319 
4320   PetscFunctionBegin;
4321   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4322   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4323   PetscFunctionReturn(0);
4324 }
4325 
4326 #undef __FUNCT__
4327 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4328 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4329 {
4330   PetscErrorCode ierr;
4331   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4332   PetscInt       *indx;
4333 
4334   PetscFunctionBegin;
4335   /* This routine will ONLY return MPIAIJ type matrix */
4336   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4337   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4338   if (n == PETSC_DECIDE) {
4339     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4340   }
4341   /* Check sum(n) = N */
4342   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4343   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4344 
4345   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4346   rstart -= m;
4347 
4348   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4349   for (i=0; i<m; i++) {
4350     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4351     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4352     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4353   }
4354 
4355   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4356   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4357   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4358   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4359   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4360   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4361   PetscFunctionReturn(0);
4362 }
4363 
4364 #undef __FUNCT__
4365 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4366 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4367 {
4368   PetscErrorCode ierr;
4369   PetscInt       m,N,i,rstart,nnz,Ii;
4370   PetscInt       *indx;
4371   PetscScalar    *values;
4372 
4373   PetscFunctionBegin;
4374   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4375   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4376   for (i=0; i<m; i++) {
4377     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4378     Ii   = i + rstart;
4379     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4380     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4381   }
4382   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4383   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4384   PetscFunctionReturn(0);
4385 }
4386 
4387 #undef __FUNCT__
4388 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4389 /*@
4390       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4391                  matrices from each processor
4392 
4393     Collective on MPI_Comm
4394 
4395    Input Parameters:
4396 +    comm - the communicators the parallel matrix will live on
4397 .    inmat - the input sequential matrices
4398 .    n - number of local columns (or PETSC_DECIDE)
4399 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4400 
4401    Output Parameter:
4402 .    outmat - the parallel matrix generated
4403 
4404     Level: advanced
4405 
4406    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4407 
4408 @*/
4409 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4410 {
4411   PetscErrorCode ierr;
4412   PetscMPIInt    size;
4413 
4414   PetscFunctionBegin;
4415   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4416   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4417   if (size == 1) {
4418     if (scall == MAT_INITIAL_MATRIX) {
4419       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4420     } else {
4421       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4422     }
4423   } else {
4424     if (scall == MAT_INITIAL_MATRIX) {
4425       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4426     }
4427     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4428   }
4429   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4430   PetscFunctionReturn(0);
4431 }
4432 
4433 #undef __FUNCT__
4434 #define __FUNCT__ "MatFileSplit"
4435 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4436 {
4437   PetscErrorCode    ierr;
4438   PetscMPIInt       rank;
4439   PetscInt          m,N,i,rstart,nnz;
4440   size_t            len;
4441   const PetscInt    *indx;
4442   PetscViewer       out;
4443   char              *name;
4444   Mat               B;
4445   const PetscScalar *values;
4446 
4447   PetscFunctionBegin;
4448   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4449   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4450   /* Should this be the type of the diagonal block of A? */
4451   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4452   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4453   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4454   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4455   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4456   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4457   for (i=0; i<m; i++) {
4458     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4459     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4460     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4461   }
4462   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4463   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4464 
4465   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4466   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4467   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4468   sprintf(name,"%s.%d",outfile,rank);
4469   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4470   ierr = PetscFree(name);CHKERRQ(ierr);
4471   ierr = MatView(B,out);CHKERRQ(ierr);
4472   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4473   ierr = MatDestroy(&B);CHKERRQ(ierr);
4474   PetscFunctionReturn(0);
4475 }
4476 
4477 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4478 #undef __FUNCT__
4479 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4480 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4481 {
4482   PetscErrorCode      ierr;
4483   Mat_Merge_SeqsToMPI *merge;
4484   PetscContainer      container;
4485 
4486   PetscFunctionBegin;
4487   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4488   if (container) {
4489     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4491     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4492     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4493     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4494     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4496     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4497     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4498     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4500     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4501     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4502     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4503     ierr = PetscFree(merge);CHKERRQ(ierr);
4504     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4505   }
4506   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4507   PetscFunctionReturn(0);
4508 }
4509 
4510 #include <../src/mat/utils/freespace.h>
4511 #include <petscbt.h>
4512 
4513 #undef __FUNCT__
4514 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4515 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4516 {
4517   PetscErrorCode      ierr;
4518   MPI_Comm            comm;
4519   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4520   PetscMPIInt         size,rank,taga,*len_s;
4521   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4522   PetscInt            proc,m;
4523   PetscInt            **buf_ri,**buf_rj;
4524   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4525   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4526   MPI_Request         *s_waits,*r_waits;
4527   MPI_Status          *status;
4528   MatScalar           *aa=a->a;
4529   MatScalar           **abuf_r,*ba_i;
4530   Mat_Merge_SeqsToMPI *merge;
4531   PetscContainer      container;
4532 
4533   PetscFunctionBegin;
4534   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4535   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4536 
4537   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4538   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4539 
4540   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4541   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4542 
4543   bi     = merge->bi;
4544   bj     = merge->bj;
4545   buf_ri = merge->buf_ri;
4546   buf_rj = merge->buf_rj;
4547 
4548   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4549   owners = merge->rowmap->range;
4550   len_s  = merge->len_s;
4551 
4552   /* send and recv matrix values */
4553   /*-----------------------------*/
4554   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4555   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4556 
4557   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4558   for (proc=0,k=0; proc<size; proc++) {
4559     if (!len_s[proc]) continue;
4560     i    = owners[proc];
4561     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4562     k++;
4563   }
4564 
4565   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4566   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4567   ierr = PetscFree(status);CHKERRQ(ierr);
4568 
4569   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4570   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4571 
4572   /* insert mat values of mpimat */
4573   /*----------------------------*/
4574   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4575   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4576 
4577   for (k=0; k<merge->nrecv; k++) {
4578     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4579     nrows       = *(buf_ri_k[k]);
4580     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4581     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4582   }
4583 
4584   /* set values of ba */
4585   m = merge->rowmap->n;
4586   for (i=0; i<m; i++) {
4587     arow = owners[rank] + i;
4588     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4589     bnzi = bi[i+1] - bi[i];
4590     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4591 
4592     /* add local non-zero vals of this proc's seqmat into ba */
4593     anzi   = ai[arow+1] - ai[arow];
4594     aj     = a->j + ai[arow];
4595     aa     = a->a + ai[arow];
4596     nextaj = 0;
4597     for (j=0; nextaj<anzi; j++) {
4598       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4599         ba_i[j] += aa[nextaj++];
4600       }
4601     }
4602 
4603     /* add received vals into ba */
4604     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4605       /* i-th row */
4606       if (i == *nextrow[k]) {
4607         anzi   = *(nextai[k]+1) - *nextai[k];
4608         aj     = buf_rj[k] + *(nextai[k]);
4609         aa     = abuf_r[k] + *(nextai[k]);
4610         nextaj = 0;
4611         for (j=0; nextaj<anzi; j++) {
4612           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4613             ba_i[j] += aa[nextaj++];
4614           }
4615         }
4616         nextrow[k]++; nextai[k]++;
4617       }
4618     }
4619     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4620   }
4621   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4622   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4623 
4624   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4625   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4626   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4627   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4628   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4629   PetscFunctionReturn(0);
4630 }
4631 
4632 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4633 
4634 #undef __FUNCT__
4635 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4636 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4637 {
4638   PetscErrorCode      ierr;
4639   Mat                 B_mpi;
4640   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4641   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4642   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4643   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4644   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4645   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4646   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4647   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4648   MPI_Status          *status;
4649   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4650   PetscBT             lnkbt;
4651   Mat_Merge_SeqsToMPI *merge;
4652   PetscContainer      container;
4653 
4654   PetscFunctionBegin;
4655   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4656 
4657   /* make sure it is a PETSc comm */
4658   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4659   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4660   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4661 
4662   ierr = PetscNew(&merge);CHKERRQ(ierr);
4663   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4664 
4665   /* determine row ownership */
4666   /*---------------------------------------------------------*/
4667   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4668   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4669   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4670   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4671   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4672   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4673   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4674 
4675   m      = merge->rowmap->n;
4676   owners = merge->rowmap->range;
4677 
4678   /* determine the number of messages to send, their lengths */
4679   /*---------------------------------------------------------*/
4680   len_s = merge->len_s;
4681 
4682   len          = 0; /* length of buf_si[] */
4683   merge->nsend = 0;
4684   for (proc=0; proc<size; proc++) {
4685     len_si[proc] = 0;
4686     if (proc == rank) {
4687       len_s[proc] = 0;
4688     } else {
4689       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4690       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4691     }
4692     if (len_s[proc]) {
4693       merge->nsend++;
4694       nrows = 0;
4695       for (i=owners[proc]; i<owners[proc+1]; i++) {
4696         if (ai[i+1] > ai[i]) nrows++;
4697       }
4698       len_si[proc] = 2*(nrows+1);
4699       len         += len_si[proc];
4700     }
4701   }
4702 
4703   /* determine the number and length of messages to receive for ij-structure */
4704   /*-------------------------------------------------------------------------*/
4705   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4706   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4707 
4708   /* post the Irecv of j-structure */
4709   /*-------------------------------*/
4710   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4711   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4712 
4713   /* post the Isend of j-structure */
4714   /*--------------------------------*/
4715   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4716 
4717   for (proc=0, k=0; proc<size; proc++) {
4718     if (!len_s[proc]) continue;
4719     i    = owners[proc];
4720     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4721     k++;
4722   }
4723 
4724   /* receives and sends of j-structure are complete */
4725   /*------------------------------------------------*/
4726   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4727   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4728 
4729   /* send and recv i-structure */
4730   /*---------------------------*/
4731   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4732   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4733 
4734   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4735   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4736   for (proc=0,k=0; proc<size; proc++) {
4737     if (!len_s[proc]) continue;
4738     /* form outgoing message for i-structure:
4739          buf_si[0]:                 nrows to be sent
4740                [1:nrows]:           row index (global)
4741                [nrows+1:2*nrows+1]: i-structure index
4742     */
4743     /*-------------------------------------------*/
4744     nrows       = len_si[proc]/2 - 1;
4745     buf_si_i    = buf_si + nrows+1;
4746     buf_si[0]   = nrows;
4747     buf_si_i[0] = 0;
4748     nrows       = 0;
4749     for (i=owners[proc]; i<owners[proc+1]; i++) {
4750       anzi = ai[i+1] - ai[i];
4751       if (anzi) {
4752         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4753         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4754         nrows++;
4755       }
4756     }
4757     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4758     k++;
4759     buf_si += len_si[proc];
4760   }
4761 
4762   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4763   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4764 
4765   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4766   for (i=0; i<merge->nrecv; i++) {
4767     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4768   }
4769 
4770   ierr = PetscFree(len_si);CHKERRQ(ierr);
4771   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4772   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4773   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4774   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4775   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4776   ierr = PetscFree(status);CHKERRQ(ierr);
4777 
4778   /* compute a local seq matrix in each processor */
4779   /*----------------------------------------------*/
4780   /* allocate bi array and free space for accumulating nonzero column info */
4781   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4782   bi[0] = 0;
4783 
4784   /* create and initialize a linked list */
4785   nlnk = N+1;
4786   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4787 
4788   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4789   len  = ai[owners[rank+1]] - ai[owners[rank]];
4790   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4791 
4792   current_space = free_space;
4793 
4794   /* determine symbolic info for each local row */
4795   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4796 
4797   for (k=0; k<merge->nrecv; k++) {
4798     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4799     nrows       = *buf_ri_k[k];
4800     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4801     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4802   }
4803 
4804   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4805   len  = 0;
4806   for (i=0; i<m; i++) {
4807     bnzi = 0;
4808     /* add local non-zero cols of this proc's seqmat into lnk */
4809     arow  = owners[rank] + i;
4810     anzi  = ai[arow+1] - ai[arow];
4811     aj    = a->j + ai[arow];
4812     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4813     bnzi += nlnk;
4814     /* add received col data into lnk */
4815     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4816       if (i == *nextrow[k]) { /* i-th row */
4817         anzi  = *(nextai[k]+1) - *nextai[k];
4818         aj    = buf_rj[k] + *nextai[k];
4819         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4820         bnzi += nlnk;
4821         nextrow[k]++; nextai[k]++;
4822       }
4823     }
4824     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4825 
4826     /* if free space is not available, make more free space */
4827     if (current_space->local_remaining<bnzi) {
4828       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4829       nspacedouble++;
4830     }
4831     /* copy data into free space, then initialize lnk */
4832     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4833     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4834 
4835     current_space->array           += bnzi;
4836     current_space->local_used      += bnzi;
4837     current_space->local_remaining -= bnzi;
4838 
4839     bi[i+1] = bi[i] + bnzi;
4840   }
4841 
4842   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4843 
4844   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4845   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4846   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4847 
4848   /* create symbolic parallel matrix B_mpi */
4849   /*---------------------------------------*/
4850   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4851   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4852   if (n==PETSC_DECIDE) {
4853     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4854   } else {
4855     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4856   }
4857   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4858   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4859   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4860   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4861   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4862 
4863   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4864   B_mpi->assembled    = PETSC_FALSE;
4865   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4866   merge->bi           = bi;
4867   merge->bj           = bj;
4868   merge->buf_ri       = buf_ri;
4869   merge->buf_rj       = buf_rj;
4870   merge->coi          = NULL;
4871   merge->coj          = NULL;
4872   merge->owners_co    = NULL;
4873 
4874   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4875 
4876   /* attach the supporting struct to B_mpi for reuse */
4877   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4878   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4879   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4880   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4881   *mpimat = B_mpi;
4882 
4883   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4884   PetscFunctionReturn(0);
4885 }
4886 
4887 #undef __FUNCT__
4888 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4889 /*@C
4890       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4891                  matrices from each processor
4892 
4893     Collective on MPI_Comm
4894 
4895    Input Parameters:
4896 +    comm - the communicators the parallel matrix will live on
4897 .    seqmat - the input sequential matrices
4898 .    m - number of local rows (or PETSC_DECIDE)
4899 .    n - number of local columns (or PETSC_DECIDE)
4900 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4901 
4902    Output Parameter:
4903 .    mpimat - the parallel matrix generated
4904 
4905     Level: advanced
4906 
4907    Notes:
4908      The dimensions of the sequential matrix in each processor MUST be the same.
4909      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4910      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4911 @*/
4912 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4913 {
4914   PetscErrorCode ierr;
4915   PetscMPIInt    size;
4916 
4917   PetscFunctionBegin;
4918   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4919   if (size == 1) {
4920     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4921     if (scall == MAT_INITIAL_MATRIX) {
4922       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4923     } else {
4924       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4925     }
4926     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4927     PetscFunctionReturn(0);
4928   }
4929   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4930   if (scall == MAT_INITIAL_MATRIX) {
4931     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4932   }
4933   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4934   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4935   PetscFunctionReturn(0);
4936 }
4937 
4938 #undef __FUNCT__
4939 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4940 /*@
4941      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4942           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4943           with MatGetSize()
4944 
4945     Not Collective
4946 
4947    Input Parameters:
4948 +    A - the matrix
4949 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4950 
4951    Output Parameter:
4952 .    A_loc - the local sequential matrix generated
4953 
4954     Level: developer
4955 
4956 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4957 
4958 @*/
4959 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4960 {
4961   PetscErrorCode ierr;
4962   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4963   Mat_SeqAIJ     *mat,*a,*b;
4964   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4965   MatScalar      *aa,*ba,*cam;
4966   PetscScalar    *ca;
4967   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4968   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4969   PetscBool      match;
4970 
4971   PetscFunctionBegin;
4972   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4973   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4974   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4975   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4976   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4977   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4978   aa = a->a; ba = b->a;
4979   if (scall == MAT_INITIAL_MATRIX) {
4980     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4981     ci[0] = 0;
4982     for (i=0; i<am; i++) {
4983       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4984     }
4985     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4986     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4987     k    = 0;
4988     for (i=0; i<am; i++) {
4989       ncols_o = bi[i+1] - bi[i];
4990       ncols_d = ai[i+1] - ai[i];
4991       /* off-diagonal portion of A */
4992       for (jo=0; jo<ncols_o; jo++) {
4993         col = cmap[*bj];
4994         if (col >= cstart) break;
4995         cj[k]   = col; bj++;
4996         ca[k++] = *ba++;
4997       }
4998       /* diagonal portion of A */
4999       for (j=0; j<ncols_d; j++) {
5000         cj[k]   = cstart + *aj++;
5001         ca[k++] = *aa++;
5002       }
5003       /* off-diagonal portion of A */
5004       for (j=jo; j<ncols_o; j++) {
5005         cj[k]   = cmap[*bj++];
5006         ca[k++] = *ba++;
5007       }
5008     }
5009     /* put together the new matrix */
5010     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5011     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5012     /* Since these are PETSc arrays, change flags to free them as necessary. */
5013     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5014     mat->free_a  = PETSC_TRUE;
5015     mat->free_ij = PETSC_TRUE;
5016     mat->nonew   = 0;
5017   } else if (scall == MAT_REUSE_MATRIX) {
5018     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5019     ci = mat->i; cj = mat->j; cam = mat->a;
5020     for (i=0; i<am; i++) {
5021       /* off-diagonal portion of A */
5022       ncols_o = bi[i+1] - bi[i];
5023       for (jo=0; jo<ncols_o; jo++) {
5024         col = cmap[*bj];
5025         if (col >= cstart) break;
5026         *cam++ = *ba++; bj++;
5027       }
5028       /* diagonal portion of A */
5029       ncols_d = ai[i+1] - ai[i];
5030       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5031       /* off-diagonal portion of A */
5032       for (j=jo; j<ncols_o; j++) {
5033         *cam++ = *ba++; bj++;
5034       }
5035     }
5036   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5037   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5038   PetscFunctionReturn(0);
5039 }
5040 
5041 #undef __FUNCT__
5042 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5043 /*@C
5044      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5045 
5046     Not Collective
5047 
5048    Input Parameters:
5049 +    A - the matrix
5050 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5051 -    row, col - index sets of rows and columns to extract (or NULL)
5052 
5053    Output Parameter:
5054 .    A_loc - the local sequential matrix generated
5055 
5056     Level: developer
5057 
5058 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5059 
5060 @*/
5061 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5062 {
5063   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5064   PetscErrorCode ierr;
5065   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5066   IS             isrowa,iscola;
5067   Mat            *aloc;
5068   PetscBool      match;
5069 
5070   PetscFunctionBegin;
5071   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5072   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5073   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5074   if (!row) {
5075     start = A->rmap->rstart; end = A->rmap->rend;
5076     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5077   } else {
5078     isrowa = *row;
5079   }
5080   if (!col) {
5081     start = A->cmap->rstart;
5082     cmap  = a->garray;
5083     nzA   = a->A->cmap->n;
5084     nzB   = a->B->cmap->n;
5085     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5086     ncols = 0;
5087     for (i=0; i<nzB; i++) {
5088       if (cmap[i] < start) idx[ncols++] = cmap[i];
5089       else break;
5090     }
5091     imark = i;
5092     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5093     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5094     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5095   } else {
5096     iscola = *col;
5097   }
5098   if (scall != MAT_INITIAL_MATRIX) {
5099     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5100     aloc[0] = *A_loc;
5101   }
5102   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5103   *A_loc = aloc[0];
5104   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5105   if (!row) {
5106     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5107   }
5108   if (!col) {
5109     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5110   }
5111   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5112   PetscFunctionReturn(0);
5113 }
5114 
5115 #undef __FUNCT__
5116 #define __FUNCT__ "MatGetBrowsOfAcols"
5117 /*@C
5118     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5119 
5120     Collective on Mat
5121 
5122    Input Parameters:
5123 +    A,B - the matrices in mpiaij format
5124 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5125 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5126 
5127    Output Parameter:
5128 +    rowb, colb - index sets of rows and columns of B to extract
5129 -    B_seq - the sequential matrix generated
5130 
5131     Level: developer
5132 
5133 @*/
5134 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5135 {
5136   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5137   PetscErrorCode ierr;
5138   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5139   IS             isrowb,iscolb;
5140   Mat            *bseq=NULL;
5141 
5142   PetscFunctionBegin;
5143   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5144     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5145   }
5146   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5147 
5148   if (scall == MAT_INITIAL_MATRIX) {
5149     start = A->cmap->rstart;
5150     cmap  = a->garray;
5151     nzA   = a->A->cmap->n;
5152     nzB   = a->B->cmap->n;
5153     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5154     ncols = 0;
5155     for (i=0; i<nzB; i++) {  /* row < local row index */
5156       if (cmap[i] < start) idx[ncols++] = cmap[i];
5157       else break;
5158     }
5159     imark = i;
5160     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5161     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5162     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5163     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5164   } else {
5165     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5166     isrowb  = *rowb; iscolb = *colb;
5167     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5168     bseq[0] = *B_seq;
5169   }
5170   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5171   *B_seq = bseq[0];
5172   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5173   if (!rowb) {
5174     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5175   } else {
5176     *rowb = isrowb;
5177   }
5178   if (!colb) {
5179     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5180   } else {
5181     *colb = iscolb;
5182   }
5183   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5184   PetscFunctionReturn(0);
5185 }
5186 
5187 #undef __FUNCT__
5188 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5189 /*
5190     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5191     of the OFF-DIAGONAL portion of local A
5192 
5193     Collective on Mat
5194 
5195    Input Parameters:
5196 +    A,B - the matrices in mpiaij format
5197 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5198 
5199    Output Parameter:
5200 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5201 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5202 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5203 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5204 
5205     Level: developer
5206 
5207 */
5208 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5209 {
5210   VecScatter_MPI_General *gen_to,*gen_from;
5211   PetscErrorCode         ierr;
5212   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5213   Mat_SeqAIJ             *b_oth;
5214   VecScatter             ctx =a->Mvctx;
5215   MPI_Comm               comm;
5216   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5217   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5218   PetscScalar            *rvalues,*svalues;
5219   MatScalar              *b_otha,*bufa,*bufA;
5220   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5221   MPI_Request            *rwaits = NULL,*swaits = NULL;
5222   MPI_Status             *sstatus,rstatus;
5223   PetscMPIInt            jj;
5224   PetscInt               *cols,sbs,rbs;
5225   PetscScalar            *vals;
5226 
5227   PetscFunctionBegin;
5228   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5229   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5230     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5231   }
5232   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5233   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5234 
5235   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5236   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5237   rvalues  = gen_from->values; /* holds the length of receiving row */
5238   svalues  = gen_to->values;   /* holds the length of sending row */
5239   nrecvs   = gen_from->n;
5240   nsends   = gen_to->n;
5241 
5242   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5243   srow    = gen_to->indices;    /* local row index to be sent */
5244   sstarts = gen_to->starts;
5245   sprocs  = gen_to->procs;
5246   sstatus = gen_to->sstatus;
5247   sbs     = gen_to->bs;
5248   rstarts = gen_from->starts;
5249   rprocs  = gen_from->procs;
5250   rbs     = gen_from->bs;
5251 
5252   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5253   if (scall == MAT_INITIAL_MATRIX) {
5254     /* i-array */
5255     /*---------*/
5256     /*  post receives */
5257     for (i=0; i<nrecvs; i++) {
5258       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5259       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5260       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5261     }
5262 
5263     /* pack the outgoing message */
5264     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5265 
5266     sstartsj[0] = 0;
5267     rstartsj[0] = 0;
5268     len         = 0; /* total length of j or a array to be sent */
5269     k           = 0;
5270     for (i=0; i<nsends; i++) {
5271       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5272       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5273       for (j=0; j<nrows; j++) {
5274         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5275         for (l=0; l<sbs; l++) {
5276           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5277 
5278           rowlen[j*sbs+l] = ncols;
5279 
5280           len += ncols;
5281           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5282         }
5283         k++;
5284       }
5285       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5286 
5287       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5288     }
5289     /* recvs and sends of i-array are completed */
5290     i = nrecvs;
5291     while (i--) {
5292       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5293     }
5294     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5295 
5296     /* allocate buffers for sending j and a arrays */
5297     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5298     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5299 
5300     /* create i-array of B_oth */
5301     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5302 
5303     b_othi[0] = 0;
5304     len       = 0; /* total length of j or a array to be received */
5305     k         = 0;
5306     for (i=0; i<nrecvs; i++) {
5307       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5308       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5309       for (j=0; j<nrows; j++) {
5310         b_othi[k+1] = b_othi[k] + rowlen[j];
5311         len        += rowlen[j]; k++;
5312       }
5313       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5314     }
5315 
5316     /* allocate space for j and a arrrays of B_oth */
5317     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5318     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5319 
5320     /* j-array */
5321     /*---------*/
5322     /*  post receives of j-array */
5323     for (i=0; i<nrecvs; i++) {
5324       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5325       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5326     }
5327 
5328     /* pack the outgoing message j-array */
5329     k = 0;
5330     for (i=0; i<nsends; i++) {
5331       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5332       bufJ  = bufj+sstartsj[i];
5333       for (j=0; j<nrows; j++) {
5334         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5335         for (ll=0; ll<sbs; ll++) {
5336           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5337           for (l=0; l<ncols; l++) {
5338             *bufJ++ = cols[l];
5339           }
5340           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5341         }
5342       }
5343       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5344     }
5345 
5346     /* recvs and sends of j-array are completed */
5347     i = nrecvs;
5348     while (i--) {
5349       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5350     }
5351     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5352   } else if (scall == MAT_REUSE_MATRIX) {
5353     sstartsj = *startsj_s;
5354     rstartsj = *startsj_r;
5355     bufa     = *bufa_ptr;
5356     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5357     b_otha   = b_oth->a;
5358   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5359 
5360   /* a-array */
5361   /*---------*/
5362   /*  post receives of a-array */
5363   for (i=0; i<nrecvs; i++) {
5364     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5365     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5366   }
5367 
5368   /* pack the outgoing message a-array */
5369   k = 0;
5370   for (i=0; i<nsends; i++) {
5371     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5372     bufA  = bufa+sstartsj[i];
5373     for (j=0; j<nrows; j++) {
5374       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5375       for (ll=0; ll<sbs; ll++) {
5376         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5377         for (l=0; l<ncols; l++) {
5378           *bufA++ = vals[l];
5379         }
5380         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5381       }
5382     }
5383     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5384   }
5385   /* recvs and sends of a-array are completed */
5386   i = nrecvs;
5387   while (i--) {
5388     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5389   }
5390   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5391   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5392 
5393   if (scall == MAT_INITIAL_MATRIX) {
5394     /* put together the new matrix */
5395     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5396 
5397     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5398     /* Since these are PETSc arrays, change flags to free them as necessary. */
5399     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5400     b_oth->free_a  = PETSC_TRUE;
5401     b_oth->free_ij = PETSC_TRUE;
5402     b_oth->nonew   = 0;
5403 
5404     ierr = PetscFree(bufj);CHKERRQ(ierr);
5405     if (!startsj_s || !bufa_ptr) {
5406       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5407       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5408     } else {
5409       *startsj_s = sstartsj;
5410       *startsj_r = rstartsj;
5411       *bufa_ptr  = bufa;
5412     }
5413   }
5414   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5415   PetscFunctionReturn(0);
5416 }
5417 
5418 #undef __FUNCT__
5419 #define __FUNCT__ "MatGetCommunicationStructs"
5420 /*@C
5421   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5422 
5423   Not Collective
5424 
5425   Input Parameters:
5426 . A - The matrix in mpiaij format
5427 
5428   Output Parameter:
5429 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5430 . colmap - A map from global column index to local index into lvec
5431 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5432 
5433   Level: developer
5434 
5435 @*/
5436 #if defined(PETSC_USE_CTABLE)
5437 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5438 #else
5439 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5440 #endif
5441 {
5442   Mat_MPIAIJ *a;
5443 
5444   PetscFunctionBegin;
5445   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5446   PetscValidPointer(lvec, 2);
5447   PetscValidPointer(colmap, 3);
5448   PetscValidPointer(multScatter, 4);
5449   a = (Mat_MPIAIJ*) A->data;
5450   if (lvec) *lvec = a->lvec;
5451   if (colmap) *colmap = a->colmap;
5452   if (multScatter) *multScatter = a->Mvctx;
5453   PetscFunctionReturn(0);
5454 }
5455 
5456 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5457 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5458 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5459 
5460 #undef __FUNCT__
5461 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5462 /*
5463     Computes (B'*A')' since computing B*A directly is untenable
5464 
5465                n                       p                          p
5466         (              )       (              )         (                  )
5467       m (      A       )  *  n (       B      )   =   m (         C        )
5468         (              )       (              )         (                  )
5469 
5470 */
5471 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5472 {
5473   PetscErrorCode ierr;
5474   Mat            At,Bt,Ct;
5475 
5476   PetscFunctionBegin;
5477   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5478   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5479   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5480   ierr = MatDestroy(&At);CHKERRQ(ierr);
5481   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5482   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5483   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5484   PetscFunctionReturn(0);
5485 }
5486 
5487 #undef __FUNCT__
5488 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5489 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5490 {
5491   PetscErrorCode ierr;
5492   PetscInt       m=A->rmap->n,n=B->cmap->n;
5493   Mat            Cmat;
5494 
5495   PetscFunctionBegin;
5496   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5497   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5498   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5499   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5500   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5501   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5502   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5503   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5504 
5505   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5506 
5507   *C = Cmat;
5508   PetscFunctionReturn(0);
5509 }
5510 
5511 /* ----------------------------------------------------------------*/
5512 #undef __FUNCT__
5513 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5514 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5515 {
5516   PetscErrorCode ierr;
5517 
5518   PetscFunctionBegin;
5519   if (scall == MAT_INITIAL_MATRIX) {
5520     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5521     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5522     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5523   }
5524   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5525   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5526   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5527   PetscFunctionReturn(0);
5528 }
5529 
5530 #if defined(PETSC_HAVE_MUMPS)
5531 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5532 #endif
5533 #if defined(PETSC_HAVE_PASTIX)
5534 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5535 #endif
5536 #if defined(PETSC_HAVE_SUPERLU_DIST)
5537 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5538 #endif
5539 #if defined(PETSC_HAVE_CLIQUE)
5540 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5541 #endif
5542 
5543 /*MC
5544    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5545 
5546    Options Database Keys:
5547 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5548 
5549   Level: beginner
5550 
5551 .seealso: MatCreateAIJ()
5552 M*/
5553 
5554 #undef __FUNCT__
5555 #define __FUNCT__ "MatCreate_MPIAIJ"
5556 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5557 {
5558   Mat_MPIAIJ     *b;
5559   PetscErrorCode ierr;
5560   PetscMPIInt    size;
5561 
5562   PetscFunctionBegin;
5563   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5564 
5565   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5566   B->data       = (void*)b;
5567   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5568   B->assembled  = PETSC_FALSE;
5569   B->insertmode = NOT_SET_VALUES;
5570   b->size       = size;
5571 
5572   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5573 
5574   /* build cache for off array entries formed */
5575   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5576 
5577   b->donotstash  = PETSC_FALSE;
5578   b->colmap      = 0;
5579   b->garray      = 0;
5580   b->roworiented = PETSC_TRUE;
5581 
5582   /* stuff used for matrix vector multiply */
5583   b->lvec  = NULL;
5584   b->Mvctx = NULL;
5585 
5586   /* stuff for MatGetRow() */
5587   b->rowindices   = 0;
5588   b->rowvalues    = 0;
5589   b->getrowactive = PETSC_FALSE;
5590 
5591   /* flexible pointer used in CUSP/CUSPARSE classes */
5592   b->spptr = NULL;
5593 
5594 #if defined(PETSC_HAVE_MUMPS)
5595   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5596 #endif
5597 #if defined(PETSC_HAVE_PASTIX)
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5599 #endif
5600 #if defined(PETSC_HAVE_SUPERLU_DIST)
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5602 #endif
5603 #if defined(PETSC_HAVE_CLIQUE)
5604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5605 #endif
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5614   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5615   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5616   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5617   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5618   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5619   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5620   PetscFunctionReturn(0);
5621 }
5622 
5623 #undef __FUNCT__
5624 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5625 /*@
5626      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5627          and "off-diagonal" part of the matrix in CSR format.
5628 
5629    Collective on MPI_Comm
5630 
5631    Input Parameters:
5632 +  comm - MPI communicator
5633 .  m - number of local rows (Cannot be PETSC_DECIDE)
5634 .  n - This value should be the same as the local size used in creating the
5635        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5636        calculated if N is given) For square matrices n is almost always m.
5637 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5638 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5639 .   i - row indices for "diagonal" portion of matrix
5640 .   j - column indices
5641 .   a - matrix values
5642 .   oi - row indices for "off-diagonal" portion of matrix
5643 .   oj - column indices
5644 -   oa - matrix values
5645 
5646    Output Parameter:
5647 .   mat - the matrix
5648 
5649    Level: advanced
5650 
5651    Notes:
5652        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5653        must free the arrays once the matrix has been destroyed and not before.
5654 
5655        The i and j indices are 0 based
5656 
5657        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5658 
5659        This sets local rows and cannot be used to set off-processor values.
5660 
5661        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5662        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5663        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5664        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5665        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5666        communication if it is known that only local entries will be set.
5667 
5668 .keywords: matrix, aij, compressed row, sparse, parallel
5669 
5670 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5671           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5672 @*/
5673 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5674 {
5675   PetscErrorCode ierr;
5676   Mat_MPIAIJ     *maij;
5677 
5678   PetscFunctionBegin;
5679   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5680   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5681   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5682   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5683   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5684   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5685   maij = (Mat_MPIAIJ*) (*mat)->data;
5686 
5687   (*mat)->preallocated = PETSC_TRUE;
5688 
5689   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5690   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5691 
5692   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5693   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5694 
5695   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5696   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5697   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5698   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5699 
5700   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5701   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5702   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5703   PetscFunctionReturn(0);
5704 }
5705 
5706 /*
5707     Special version for direct calls from Fortran
5708 */
5709 #include <petsc-private/fortranimpl.h>
5710 
5711 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5712 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5713 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5714 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5715 #endif
5716 
5717 /* Change these macros so can be used in void function */
5718 #undef CHKERRQ
5719 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5720 #undef SETERRQ2
5721 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5722 #undef SETERRQ3
5723 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5724 #undef SETERRQ
5725 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5726 
5727 #undef __FUNCT__
5728 #define __FUNCT__ "matsetvaluesmpiaij_"
5729 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5730 {
5731   Mat            mat  = *mmat;
5732   PetscInt       m    = *mm, n = *mn;
5733   InsertMode     addv = *maddv;
5734   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5735   PetscScalar    value;
5736   PetscErrorCode ierr;
5737 
5738   MatCheckPreallocated(mat,1);
5739   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5740 
5741 #if defined(PETSC_USE_DEBUG)
5742   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5743 #endif
5744   {
5745     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5746     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5747     PetscBool roworiented = aij->roworiented;
5748 
5749     /* Some Variables required in the macro */
5750     Mat        A                 = aij->A;
5751     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5752     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5753     MatScalar  *aa               = a->a;
5754     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5755     Mat        B                 = aij->B;
5756     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5757     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5758     MatScalar  *ba               = b->a;
5759 
5760     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5761     PetscInt  nonew = a->nonew;
5762     MatScalar *ap1,*ap2;
5763 
5764     PetscFunctionBegin;
5765     for (i=0; i<m; i++) {
5766       if (im[i] < 0) continue;
5767 #if defined(PETSC_USE_DEBUG)
5768       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5769 #endif
5770       if (im[i] >= rstart && im[i] < rend) {
5771         row      = im[i] - rstart;
5772         lastcol1 = -1;
5773         rp1      = aj + ai[row];
5774         ap1      = aa + ai[row];
5775         rmax1    = aimax[row];
5776         nrow1    = ailen[row];
5777         low1     = 0;
5778         high1    = nrow1;
5779         lastcol2 = -1;
5780         rp2      = bj + bi[row];
5781         ap2      = ba + bi[row];
5782         rmax2    = bimax[row];
5783         nrow2    = bilen[row];
5784         low2     = 0;
5785         high2    = nrow2;
5786 
5787         for (j=0; j<n; j++) {
5788           if (roworiented) value = v[i*n+j];
5789           else value = v[i+j*m];
5790           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5791           if (in[j] >= cstart && in[j] < cend) {
5792             col = in[j] - cstart;
5793             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5794           } else if (in[j] < 0) continue;
5795 #if defined(PETSC_USE_DEBUG)
5796           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5797 #endif
5798           else {
5799             if (mat->was_assembled) {
5800               if (!aij->colmap) {
5801                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5802               }
5803 #if defined(PETSC_USE_CTABLE)
5804               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5805               col--;
5806 #else
5807               col = aij->colmap[in[j]] - 1;
5808 #endif
5809               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5810                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5811                 col  =  in[j];
5812                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5813                 B     = aij->B;
5814                 b     = (Mat_SeqAIJ*)B->data;
5815                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5816                 rp2   = bj + bi[row];
5817                 ap2   = ba + bi[row];
5818                 rmax2 = bimax[row];
5819                 nrow2 = bilen[row];
5820                 low2  = 0;
5821                 high2 = nrow2;
5822                 bm    = aij->B->rmap->n;
5823                 ba    = b->a;
5824               }
5825             } else col = in[j];
5826             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5827           }
5828         }
5829       } else if (!aij->donotstash) {
5830         if (roworiented) {
5831           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5832         } else {
5833           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5834         }
5835       }
5836     }
5837   }
5838   PetscFunctionReturnVoid();
5839 }
5840 
5841