xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ffad99011bdf8bdff5e8540ef3c49b4fd8d6e6bb)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     if (!rank) {
206       bses[0] = gmat->rmap->bs;
207       bses[1] = gmat->cmap->bs;
208     }
209     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
210     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
211     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
212     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
213     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
214     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
215 
216     rowners[0] = 0;
217     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
218     rstart = rowners[rank];
219     rend   = rowners[rank+1];
220     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
221     if (!rank) {
222       gmata = (Mat_SeqAIJ*) gmat->data;
223       /* send row lengths to all processors */
224       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
225       for (i=1; i<size; i++) {
226         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
227       }
228       /* determine number diagonal and off-diagonal counts */
229       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
230       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
231       jj   = 0;
232       for (i=0; i<m; i++) {
233         for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236           jj++;
237         }
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
264       jj   = 0;
265       for (i=0; i<m; i++) {
266         for (j=0; j<dlens[i]; j++) {
267           if (gmataj[jj] < rstart) ld[i]++;
268           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
269           jj++;
270         }
271       }
272       /* receive numerical values */
273       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
275     }
276     /* set preallocation */
277     for (i=0; i<m; i++) {
278       dlens[i] -= olens[i];
279     }
280     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
281     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
282 
283     for (i=0; i<m; i++) {
284       dlens[i] += olens[i];
285     }
286     cnt = 0;
287     for (i=0; i<m; i++) {
288       row  = rstart + i;
289       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
290       cnt += dlens[i];
291     }
292     if (rank) {
293       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
294     }
295     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
296     ierr = PetscFree(rowners);CHKERRQ(ierr);
297 
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299 
300     *inmat = mat;
301   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
302     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
303     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
304     mat  = *inmat;
305     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
306     if (!rank) {
307       /* send numerical values to other processes */
308       gmata  = (Mat_SeqAIJ*) gmat->data;
309       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
310       gmataa = gmata->a;
311       for (i=1; i<size; i++) {
312         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
313         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
314       }
315       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
316     } else {
317       /* receive numerical values from process 0*/
318       nz   = Ad->nz + Ao->nz;
319       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
320       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
321     }
322     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
323     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
324     ad = Ad->a;
325     ao = Ao->a;
326     if (mat->rmap->n) {
327       i  = 0;
328       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
329       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
330     }
331     for (i=1; i<mat->rmap->n; i++) {
332       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
333       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
334     }
335     i--;
336     if (mat->rmap->n) {
337       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
338     }
339     if (rank) {
340       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
341     }
342   }
343   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined(PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++) {
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
373   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
374 #endif
375   PetscFunctionReturn(0);
376 }
377 
378 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
379 { \
380     if (col <= lastcol1)  low1 = 0;     \
381     else                 high1 = nrow1; \
382     lastcol1 = col;\
383     while (high1-low1 > 5) { \
384       t = (low1+high1)/2; \
385       if (rp1[t] > col) high1 = t; \
386       else              low1  = t; \
387     } \
388       for (_i=low1; _i<high1; _i++) { \
389         if (rp1[_i] > col) break; \
390         if (rp1[_i] == col) { \
391           if (addv == ADD_VALUES) ap1[_i] += value;   \
392           else                    ap1[_i] = value; \
393           goto a_noinsert; \
394         } \
395       }  \
396       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400       N = nrow1++ - 1; a->nz++; high1++; \
401       /* shift up all the later entries in this row */ \
402       for (ii=N; ii>=_i; ii--) { \
403         rp1[ii+1] = rp1[ii]; \
404         ap1[ii+1] = ap1[ii]; \
405       } \
406       rp1[_i] = col;  \
407       ap1[_i] = value;  \
408       A->nonzerostate++;\
409       a_noinsert: ; \
410       ailen[row] = nrow1; \
411 }
412 
413 
414 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
415   { \
416     if (col <= lastcol2) low2 = 0;                        \
417     else high2 = nrow2;                                   \
418     lastcol2 = col;                                       \
419     while (high2-low2 > 5) {                              \
420       t = (low2+high2)/2;                                 \
421       if (rp2[t] > col) high2 = t;                        \
422       else             low2  = t;                         \
423     }                                                     \
424     for (_i=low2; _i<high2; _i++) {                       \
425       if (rp2[_i] > col) break;                           \
426       if (rp2[_i] == col) {                               \
427         if (addv == ADD_VALUES) ap2[_i] += value;         \
428         else                    ap2[_i] = value;          \
429         goto b_noinsert;                                  \
430       }                                                   \
431     }                                                     \
432     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
433     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
434     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
435     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
436     N = nrow2++ - 1; b->nz++; high2++;                    \
437     /* shift up all the later entries in this row */      \
438     for (ii=N; ii>=_i; ii--) {                            \
439       rp2[ii+1] = rp2[ii];                                \
440       ap2[ii+1] = ap2[ii];                                \
441     }                                                     \
442     rp2[_i] = col;                                        \
443     ap2[_i] = value;                                      \
444     B->nonzerostate++;                                    \
445     b_noinsert: ;                                         \
446     bilen[row] = nrow2;                                   \
447   }
448 
449 #undef __FUNCT__
450 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
451 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
452 {
453   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
454   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
455   PetscErrorCode ierr;
456   PetscInt       l,*garray = mat->garray,diag;
457 
458   PetscFunctionBegin;
459   /* code only works for square matrices A */
460 
461   /* find size of row to the left of the diagonal part */
462   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
463   row  = row - diag;
464   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
465     if (garray[b->j[b->i[row]+l]] > diag) break;
466   }
467   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* diagonal part */
470   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
471 
472   /* right of diagonal part */
473   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatSetValues_MPIAIJ"
479 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
480 {
481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
482   PetscScalar    value;
483   PetscErrorCode ierr;
484   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
485   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
486   PetscBool      roworiented = aij->roworiented;
487 
488   /* Some Variables required in the macro */
489   Mat        A                 = aij->A;
490   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
491   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
492   MatScalar  *aa               = a->a;
493   PetscBool  ignorezeroentries = a->ignorezeroentries;
494   Mat        B                 = aij->B;
495   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
496   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
497   MatScalar  *ba               = b->a;
498 
499   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
500   PetscInt  nonew;
501   MatScalar *ap1,*ap2;
502 
503   PetscFunctionBegin;
504   if (v) PetscValidScalarPointer(v,6);
505   for (i=0; i<m; i++) {
506     if (im[i] < 0) continue;
507 #if defined(PETSC_USE_DEBUG)
508     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
509 #endif
510     if (im[i] >= rstart && im[i] < rend) {
511       row      = im[i] - rstart;
512       lastcol1 = -1;
513       rp1      = aj + ai[row];
514       ap1      = aa + ai[row];
515       rmax1    = aimax[row];
516       nrow1    = ailen[row];
517       low1     = 0;
518       high1    = nrow1;
519       lastcol2 = -1;
520       rp2      = bj + bi[row];
521       ap2      = ba + bi[row];
522       rmax2    = bimax[row];
523       nrow2    = bilen[row];
524       low2     = 0;
525       high2    = nrow2;
526 
527       for (j=0; j<n; j++) {
528         if (v) {
529           if (roworiented) value = v[i*n+j];
530           else             value = v[i+j*m];
531         } else value = 0.0;
532         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
533         if (in[j] >= cstart && in[j] < cend) {
534           col   = in[j] - cstart;
535           nonew = a->nonew;
536           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
537         } else if (in[j] < 0) continue;
538 #if defined(PETSC_USE_DEBUG)
539         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
540 #endif
541         else {
542           if (mat->was_assembled) {
543             if (!aij->colmap) {
544               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
545             }
546 #if defined(PETSC_USE_CTABLE)
547             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
548             col--;
549 #else
550             col = aij->colmap[in[j]] - 1;
551 #endif
552             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
553               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
554               col  =  in[j];
555               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
556               B     = aij->B;
557               b     = (Mat_SeqAIJ*)B->data;
558               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
559               rp2   = bj + bi[row];
560               ap2   = ba + bi[row];
561               rmax2 = bimax[row];
562               nrow2 = bilen[row];
563               low2  = 0;
564               high2 = nrow2;
565               bm    = aij->B->rmap->n;
566               ba    = b->a;
567             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
568           } else col = in[j];
569           nonew = b->nonew;
570           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
571         }
572       }
573     } else {
574       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
575       if (!aij->donotstash) {
576         mat->assembled = PETSC_FALSE;
577         if (roworiented) {
578           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
579         } else {
580           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         }
582       }
583     }
584   }
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "MatGetValues_MPIAIJ"
590 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
591 {
592   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
593   PetscErrorCode ierr;
594   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
595   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
596 
597   PetscFunctionBegin;
598   for (i=0; i<m; i++) {
599     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
600     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
601     if (idxm[i] >= rstart && idxm[i] < rend) {
602       row = idxm[i] - rstart;
603       for (j=0; j<n; j++) {
604         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
605         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
606         if (idxn[j] >= cstart && idxn[j] < cend) {
607           col  = idxn[j] - cstart;
608           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
609         } else {
610           if (!aij->colmap) {
611             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
612           }
613 #if defined(PETSC_USE_CTABLE)
614           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
615           col--;
616 #else
617           col = aij->colmap[idxn[j]] - 1;
618 #endif
619           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
620           else {
621             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
622           }
623         }
624       }
625     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
626   }
627   PetscFunctionReturn(0);
628 }
629 
630 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
631 
632 #undef __FUNCT__
633 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
634 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
635 {
636   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
637   PetscErrorCode ierr;
638   PetscInt       nstash,reallocs;
639   InsertMode     addv;
640 
641   PetscFunctionBegin;
642   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
643 
644   /* make sure all processors are either in INSERTMODE or ADDMODE */
645   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
646   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
647   mat->insertmode = addv; /* in case this processor had no cache */
648 
649   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
650   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
651   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 
655 #undef __FUNCT__
656 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
657 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
658 {
659   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
660   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
661   PetscErrorCode ierr;
662   PetscMPIInt    n;
663   PetscInt       i,j,rstart,ncols,flg;
664   PetscInt       *row,*col;
665   PetscBool      other_disassembled;
666   PetscScalar    *val;
667   InsertMode     addv = mat->insertmode;
668 
669   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
670 
671   PetscFunctionBegin;
672   if (!aij->donotstash && !mat->nooffprocentries) {
673     while (1) {
674       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
675       if (!flg) break;
676 
677       for (i=0; i<n; ) {
678         /* Now identify the consecutive vals belonging to the same row */
679         for (j=i,rstart=row[j]; j<n; j++) {
680           if (row[j] != rstart) break;
681         }
682         if (j < n) ncols = j-i;
683         else       ncols = n-i;
684         /* Now assemble all these values with a single function call */
685         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
686 
687         i = j;
688       }
689     }
690     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
691   }
692   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
693   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
694 
695   /* determine if any processor has disassembled, if so we must
696      also disassemble ourselfs, in order that we may reassemble. */
697   /*
698      if nonzero structure of submatrix B cannot change then we know that
699      no processor disassembled thus we can skip this stuff
700   */
701   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
702     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
703     if (mat->was_assembled && !other_disassembled) {
704       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
705     }
706   }
707   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
708     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
709   }
710   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
711   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
712   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
713 
714   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
715 
716   aij->rowvalues = 0;
717 
718   /* used by MatAXPY() */
719   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
720   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
721 
722   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
723   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
724 
725   {
726     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
727     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
728   }
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
734 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
735 {
736   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
737   PetscErrorCode ierr;
738 
739   PetscFunctionBegin;
740   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
741   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatZeroRows_MPIAIJ"
747 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
748 {
749   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
750   PetscInt      *owners = A->rmap->range;
751   PetscInt       n      = A->rmap->n;
752   PetscMPIInt    size   = mat->size;
753   PetscSF        sf;
754   PetscInt      *lrows;
755   PetscSFNode   *rrows;
756   PetscInt       lastidx = -1, r, p = 0, len = 0;
757   PetscErrorCode ierr;
758 
759   PetscFunctionBegin;
760   /* Create SF where leaves are input rows and roots are owned rows */
761   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
762   for (r = 0; r < n; ++r) lrows[r] = -1;
763   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
764   for (r = 0; r < N; ++r) {
765     const PetscInt idx   = rows[r];
766     PetscBool      found = PETSC_FALSE;
767     /* Trick for efficient searching for sorted rows */
768     if (lastidx > idx) p = 0;
769     lastidx = idx;
770     for (; p < size; ++p) {
771       if (idx >= owners[p] && idx < owners[p+1]) {
772         rrows[r].rank  = p;
773         rrows[r].index = rows[r] - owners[p];
774         found = PETSC_TRUE;
775         break;
776       }
777     }
778     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
779   }
780   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
781   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
782   /* Collect flags for rows to be zeroed */
783   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
784   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
785   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
786   /* Compress and put in row numbers */
787   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
788   /* fix right hand side if needed */
789   if (x && b) {
790     const PetscScalar *xx;
791     PetscScalar       *bb;
792 
793     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
794     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
795     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
796     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
797     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
798   }
799   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
800   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
801   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
802     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
803   } else if (diag != 0.0) {
804     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
805     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
806     for (r = 0; r < len; ++r) {
807       const PetscInt row = lrows[r] + A->rmap->rstart;
808       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
809     }
810     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
811     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
812   } else {
813     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
814   }
815   ierr = PetscFree(lrows);CHKERRQ(ierr);
816   {
817     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
818     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
819   }
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
825 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
826 {
827   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
828   PetscErrorCode    ierr;
829   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
830   PetscInt          i,j,r,m,p = 0,len = 0;
831   PetscInt          *lrows,*owners = A->rmap->range;
832   PetscSFNode       *rrows;
833   PetscSF           sf;
834   const PetscScalar *xx;
835   PetscScalar       *bb,*mask;
836   Vec               xmask,lmask;
837   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
838   const PetscInt    *aj, *ii,*ridx;
839   PetscScalar       *aa;
840 #if defined(PETSC_DEBUG)
841   PetscBool found = PETSC_FALSE;
842 #endif
843 
844   PetscFunctionBegin;
845   /* Create SF where leaves are input rows and roots are owned rows */
846   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
847   for (r = 0; r < n; ++r) lrows[r] = -1;
848   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
849   for (r = 0; r < N; ++r) {
850     const PetscInt idx   = rows[r];
851     PetscBool      found = PETSC_FALSE;
852     /* Trick for efficient searching for sorted rows */
853     if (lastidx > idx) p = 0;
854     lastidx = idx;
855     for (; p < size; ++p) {
856       if (idx >= owners[p] && idx < owners[p+1]) {
857         rrows[r].rank  = p;
858         rrows[r].index = rows[r] - owners[p];
859         found = PETSC_TRUE;
860         break;
861       }
862     }
863     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
864   }
865   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
866   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
867   /* Collect flags for rows to be zeroed */
868   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
869   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
870   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
871   /* Compress and put in row numbers */
872   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
873   /* zero diagonal part of matrix */
874   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
875   /* handle off diagonal part of matrix */
876   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
877   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
878   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
879   for (i=0; i<len; i++) bb[lrows[i]] = 1;
880   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
881   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
884   if (x) {
885     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
888     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
889   }
890   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
891   /* remove zeroed rows of off diagonal matrix */
892   ii = aij->i;
893   for (i=0; i<len; i++) {
894     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
895   }
896   /* loop over all elements of off process part of matrix zeroing removed columns*/
897   if (aij->compressedrow.use) {
898     m    = aij->compressedrow.nrows;
899     ii   = aij->compressedrow.i;
900     ridx = aij->compressedrow.rindex;
901     for (i=0; i<m; i++) {
902       n  = ii[i+1] - ii[i];
903       aj = aij->j + ii[i];
904       aa = aij->a + ii[i];
905 
906       for (j=0; j<n; j++) {
907         if (PetscAbsScalar(mask[*aj])) {
908           if (b) bb[*ridx] -= *aa*xx[*aj];
909           *aa = 0.0;
910         }
911         aa++;
912         aj++;
913       }
914       ridx++;
915     }
916   } else { /* do not use compressed row format */
917     m = l->B->rmap->n;
918     for (i=0; i<m; i++) {
919       n  = ii[i+1] - ii[i];
920       aj = aij->j + ii[i];
921       aa = aij->a + ii[i];
922       for (j=0; j<n; j++) {
923         if (PetscAbsScalar(mask[*aj])) {
924           if (b) bb[i] -= *aa*xx[*aj];
925           *aa = 0.0;
926         }
927         aa++;
928         aj++;
929       }
930     }
931   }
932   if (x) {
933     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
934     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
935   }
936   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
937   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
938   ierr = PetscFree(lrows);CHKERRQ(ierr);
939   PetscFunctionReturn(0);
940 }
941 
942 #undef __FUNCT__
943 #define __FUNCT__ "MatMult_MPIAIJ"
944 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
945 {
946   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
947   PetscErrorCode ierr;
948   PetscInt       nt;
949 
950   PetscFunctionBegin;
951   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
952   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
953   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
954   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
955   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
956   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
962 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
963 {
964   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatMultAdd_MPIAIJ"
974 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
975 {
976   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
977   PetscErrorCode ierr;
978 
979   PetscFunctionBegin;
980   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
981   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
982   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
983   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 
987 #undef __FUNCT__
988 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
989 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
990 {
991   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
992   PetscErrorCode ierr;
993   PetscBool      merged;
994 
995   PetscFunctionBegin;
996   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
997   /* do nondiagonal part */
998   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
999   if (!merged) {
1000     /* send it on its way */
1001     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1002     /* do local part */
1003     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1004     /* receive remote parts: note this assumes the values are not actually */
1005     /* added in yy until the next line, */
1006     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1007   } else {
1008     /* do local part */
1009     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1010     /* send it on its way */
1011     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1012     /* values actually were received in the Begin() but we need to call this nop */
1013     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1014   }
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1020 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1021 {
1022   MPI_Comm       comm;
1023   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1024   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1025   IS             Me,Notme;
1026   PetscErrorCode ierr;
1027   PetscInt       M,N,first,last,*notme,i;
1028   PetscMPIInt    size;
1029 
1030   PetscFunctionBegin;
1031   /* Easy test: symmetric diagonal block */
1032   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1033   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1034   if (!*f) PetscFunctionReturn(0);
1035   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1036   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1037   if (size == 1) PetscFunctionReturn(0);
1038 
1039   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1040   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1041   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1042   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1043   for (i=0; i<first; i++) notme[i] = i;
1044   for (i=last; i<M; i++) notme[i-last+first] = i;
1045   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1046   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1047   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1048   Aoff = Aoffs[0];
1049   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1050   Boff = Boffs[0];
1051   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1052   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1053   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1054   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1055   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1056   ierr = PetscFree(notme);CHKERRQ(ierr);
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1062 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1063 {
1064   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1065   PetscErrorCode ierr;
1066 
1067   PetscFunctionBegin;
1068   /* do nondiagonal part */
1069   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1070   /* send it on its way */
1071   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1072   /* do local part */
1073   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1074   /* receive remote parts */
1075   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 /*
1080   This only works correctly for square matrices where the subblock A->A is the
1081    diagonal block
1082 */
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1085 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1086 {
1087   PetscErrorCode ierr;
1088   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1089 
1090   PetscFunctionBegin;
1091   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1092   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1093   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatScale_MPIAIJ"
1099 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1100 {
1101   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1106   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatDestroy_MPIAIJ"
1112 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1113 {
1114   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1115   PetscErrorCode ierr;
1116 
1117   PetscFunctionBegin;
1118 #if defined(PETSC_USE_LOG)
1119   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1120 #endif
1121   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1122   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1123   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1124   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1125 #if defined(PETSC_USE_CTABLE)
1126   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1127 #else
1128   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1129 #endif
1130   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1131   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1132   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1133   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1134   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1135   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1136 
1137   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1140   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1144   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1145   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1151 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1152 {
1153   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1154   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1155   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1156   PetscErrorCode ierr;
1157   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1158   int            fd;
1159   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1160   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1161   PetscScalar    *column_values;
1162   PetscInt       message_count,flowcontrolcount;
1163   FILE           *file;
1164 
1165   PetscFunctionBegin;
1166   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1167   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1168   nz   = A->nz + B->nz;
1169   if (!rank) {
1170     header[0] = MAT_FILE_CLASSID;
1171     header[1] = mat->rmap->N;
1172     header[2] = mat->cmap->N;
1173 
1174     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1175     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1176     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1177     /* get largest number of rows any processor has */
1178     rlen  = mat->rmap->n;
1179     range = mat->rmap->range;
1180     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1181   } else {
1182     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1183     rlen = mat->rmap->n;
1184   }
1185 
1186   /* load up the local row counts */
1187   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1188   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1189 
1190   /* store the row lengths to the file */
1191   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1192   if (!rank) {
1193     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1194     for (i=1; i<size; i++) {
1195       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1196       rlen = range[i+1] - range[i];
1197       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1198       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1199     }
1200     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1201   } else {
1202     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1203     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1204     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1205   }
1206   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1207 
1208   /* load up the local column indices */
1209   nzmax = nz; /* th processor needs space a largest processor needs */
1210   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1211   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1212   cnt   = 0;
1213   for (i=0; i<mat->rmap->n; i++) {
1214     for (j=B->i[i]; j<B->i[i+1]; j++) {
1215       if ((col = garray[B->j[j]]) > cstart) break;
1216       column_indices[cnt++] = col;
1217     }
1218     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1219     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1220   }
1221   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1222 
1223   /* store the column indices to the file */
1224   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1225   if (!rank) {
1226     MPI_Status status;
1227     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1228     for (i=1; i<size; i++) {
1229       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1230       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1231       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1232       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1233       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1234     }
1235     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1236   } else {
1237     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1238     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1240     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1241   }
1242   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1243 
1244   /* load up the local column values */
1245   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1246   cnt  = 0;
1247   for (i=0; i<mat->rmap->n; i++) {
1248     for (j=B->i[i]; j<B->i[i+1]; j++) {
1249       if (garray[B->j[j]] > cstart) break;
1250       column_values[cnt++] = B->a[j];
1251     }
1252     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1253     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1254   }
1255   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1256 
1257   /* store the column values to the file */
1258   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1259   if (!rank) {
1260     MPI_Status status;
1261     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1262     for (i=1; i<size; i++) {
1263       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1264       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1265       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1266       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1267       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1268     }
1269     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1270   } else {
1271     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1272     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1273     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1274     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1275   }
1276   ierr = PetscFree(column_values);CHKERRQ(ierr);
1277 
1278   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1279   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #include <petscdraw.h>
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1286 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1287 {
1288   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1289   PetscErrorCode    ierr;
1290   PetscMPIInt       rank = aij->rank,size = aij->size;
1291   PetscBool         isdraw,iascii,isbinary;
1292   PetscViewer       sviewer;
1293   PetscViewerFormat format;
1294 
1295   PetscFunctionBegin;
1296   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1297   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1298   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1299   if (iascii) {
1300     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1301     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1302       MatInfo   info;
1303       PetscBool inodes;
1304 
1305       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1306       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1307       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1308       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1309       if (!inodes) {
1310         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1311                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1312       } else {
1313         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1314                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1315       }
1316       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1317       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1318       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1319       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1320       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1321       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1322       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1323       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1324       PetscFunctionReturn(0);
1325     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1326       PetscInt inodecount,inodelimit,*inodes;
1327       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1328       if (inodes) {
1329         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1330       } else {
1331         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1332       }
1333       PetscFunctionReturn(0);
1334     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1335       PetscFunctionReturn(0);
1336     }
1337   } else if (isbinary) {
1338     if (size == 1) {
1339       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1340       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1341     } else {
1342       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1343     }
1344     PetscFunctionReturn(0);
1345   } else if (isdraw) {
1346     PetscDraw draw;
1347     PetscBool isnull;
1348     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1349     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1350   }
1351 
1352   if (size == 1) {
1353     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1354     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1355   } else {
1356     /* assemble the entire matrix onto first processor. */
1357     Mat        A;
1358     Mat_SeqAIJ *Aloc;
1359     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1360     MatScalar  *a;
1361 
1362     if (mat->rmap->N > 1024) {
1363       PetscBool flg = PETSC_FALSE;
1364 
1365       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1366       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1367     }
1368 
1369     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1370     if (!rank) {
1371       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1372     } else {
1373       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1374     }
1375     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1376     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1377     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1378     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1379     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1380 
1381     /* copy over the A part */
1382     Aloc = (Mat_SeqAIJ*)aij->A->data;
1383     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1384     row  = mat->rmap->rstart;
1385     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1386     for (i=0; i<m; i++) {
1387       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1388       row++;
1389       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1390     }
1391     aj = Aloc->j;
1392     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1393 
1394     /* copy over the B part */
1395     Aloc = (Mat_SeqAIJ*)aij->B->data;
1396     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1397     row  = mat->rmap->rstart;
1398     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1399     ct   = cols;
1400     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1401     for (i=0; i<m; i++) {
1402       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1403       row++;
1404       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1405     }
1406     ierr = PetscFree(ct);CHKERRQ(ierr);
1407     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1408     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1409     /*
1410        Everyone has to call to draw the matrix since the graphics waits are
1411        synchronized across all processors that share the PetscDraw object
1412     */
1413     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1414     if (!rank) {
1415       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1416       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1417       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1418       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1419     }
1420     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1421     ierr = MatDestroy(&A);CHKERRQ(ierr);
1422   }
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatView_MPIAIJ"
1428 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1429 {
1430   PetscErrorCode ierr;
1431   PetscBool      iascii,isdraw,issocket,isbinary;
1432 
1433   PetscFunctionBegin;
1434   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1435   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1436   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1437   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1438   if (iascii || isdraw || isbinary || issocket) {
1439     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1440   }
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 #undef __FUNCT__
1445 #define __FUNCT__ "MatSOR_MPIAIJ"
1446 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1447 {
1448   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1449   PetscErrorCode ierr;
1450   Vec            bb1 = 0;
1451   PetscBool      hasop;
1452 
1453   PetscFunctionBegin;
1454   if (flag == SOR_APPLY_UPPER) {
1455     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1456     PetscFunctionReturn(0);
1457   }
1458 
1459   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1460     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1461   }
1462 
1463   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1464     if (flag & SOR_ZERO_INITIAL_GUESS) {
1465       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1466       its--;
1467     }
1468 
1469     while (its--) {
1470       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1471       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1472 
1473       /* update rhs: bb1 = bb - B*x */
1474       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1475       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1476 
1477       /* local sweep */
1478       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1479     }
1480   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1481     if (flag & SOR_ZERO_INITIAL_GUESS) {
1482       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1483       its--;
1484     }
1485     while (its--) {
1486       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1487       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1488 
1489       /* update rhs: bb1 = bb - B*x */
1490       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1491       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1492 
1493       /* local sweep */
1494       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1495     }
1496   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1497     if (flag & SOR_ZERO_INITIAL_GUESS) {
1498       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1499       its--;
1500     }
1501     while (its--) {
1502       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1504 
1505       /* update rhs: bb1 = bb - B*x */
1506       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1507       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1508 
1509       /* local sweep */
1510       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1511     }
1512   } else if (flag & SOR_EISENSTAT) {
1513     Vec xx1;
1514 
1515     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1516     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1517 
1518     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1520     if (!mat->diag) {
1521       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1522       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1523     }
1524     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1525     if (hasop) {
1526       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1527     } else {
1528       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1529     }
1530     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1531 
1532     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1533 
1534     /* local sweep */
1535     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1536     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1537     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1538   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1539 
1540   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatPermute_MPIAIJ"
1546 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1547 {
1548   Mat            aA,aB,Aperm;
1549   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1550   PetscScalar    *aa,*ba;
1551   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1552   PetscSF        rowsf,sf;
1553   IS             parcolp = NULL;
1554   PetscBool      done;
1555   PetscErrorCode ierr;
1556 
1557   PetscFunctionBegin;
1558   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1559   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1560   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1561   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1562 
1563   /* Invert row permutation to find out where my rows should go */
1564   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1565   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1566   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1567   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1568   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1569   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1570 
1571   /* Invert column permutation to find out where my columns should go */
1572   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1573   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1574   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1575   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1576   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1577   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1578   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1579 
1580   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1581   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1582   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1583 
1584   /* Find out where my gcols should go */
1585   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1586   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1587   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1588   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1589   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1590   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1591   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1592   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1593 
1594   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1595   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1596   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1597   for (i=0; i<m; i++) {
1598     PetscInt row = rdest[i],rowner;
1599     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1600     for (j=ai[i]; j<ai[i+1]; j++) {
1601       PetscInt cowner,col = cdest[aj[j]];
1602       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1603       if (rowner == cowner) dnnz[i]++;
1604       else onnz[i]++;
1605     }
1606     for (j=bi[i]; j<bi[i+1]; j++) {
1607       PetscInt cowner,col = gcdest[bj[j]];
1608       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1609       if (rowner == cowner) dnnz[i]++;
1610       else onnz[i]++;
1611     }
1612   }
1613   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1614   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1615   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1616   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1617   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1618 
1619   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1620   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1621   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1622   for (i=0; i<m; i++) {
1623     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1624     PetscInt j0,rowlen;
1625     rowlen = ai[i+1] - ai[i];
1626     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1627       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1628       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1629     }
1630     rowlen = bi[i+1] - bi[i];
1631     for (j0=j=0; j<rowlen; j0=j) {
1632       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1633       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1634     }
1635   }
1636   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1637   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1638   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1639   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1640   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1641   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1642   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1643   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1644   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1645   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1646   *B = Aperm;
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 #undef __FUNCT__
1651 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1652 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1653 {
1654   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1655   Mat            A    = mat->A,B = mat->B;
1656   PetscErrorCode ierr;
1657   PetscReal      isend[5],irecv[5];
1658 
1659   PetscFunctionBegin;
1660   info->block_size = 1.0;
1661   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1662 
1663   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1664   isend[3] = info->memory;  isend[4] = info->mallocs;
1665 
1666   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1667 
1668   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1669   isend[3] += info->memory;  isend[4] += info->mallocs;
1670   if (flag == MAT_LOCAL) {
1671     info->nz_used      = isend[0];
1672     info->nz_allocated = isend[1];
1673     info->nz_unneeded  = isend[2];
1674     info->memory       = isend[3];
1675     info->mallocs      = isend[4];
1676   } else if (flag == MAT_GLOBAL_MAX) {
1677     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1678 
1679     info->nz_used      = irecv[0];
1680     info->nz_allocated = irecv[1];
1681     info->nz_unneeded  = irecv[2];
1682     info->memory       = irecv[3];
1683     info->mallocs      = irecv[4];
1684   } else if (flag == MAT_GLOBAL_SUM) {
1685     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1686 
1687     info->nz_used      = irecv[0];
1688     info->nz_allocated = irecv[1];
1689     info->nz_unneeded  = irecv[2];
1690     info->memory       = irecv[3];
1691     info->mallocs      = irecv[4];
1692   }
1693   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1694   info->fill_ratio_needed = 0;
1695   info->factor_mallocs    = 0;
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatSetOption_MPIAIJ"
1701 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1702 {
1703   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1704   PetscErrorCode ierr;
1705 
1706   PetscFunctionBegin;
1707   switch (op) {
1708   case MAT_NEW_NONZERO_LOCATIONS:
1709   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1710   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1711   case MAT_KEEP_NONZERO_PATTERN:
1712   case MAT_NEW_NONZERO_LOCATION_ERR:
1713   case MAT_USE_INODES:
1714   case MAT_IGNORE_ZERO_ENTRIES:
1715     MatCheckPreallocated(A,1);
1716     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1717     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1718     break;
1719   case MAT_ROW_ORIENTED:
1720     a->roworiented = flg;
1721 
1722     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1723     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1724     break;
1725   case MAT_NEW_DIAGONALS:
1726     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1727     break;
1728   case MAT_IGNORE_OFF_PROC_ENTRIES:
1729     a->donotstash = flg;
1730     break;
1731   case MAT_SPD:
1732     A->spd_set = PETSC_TRUE;
1733     A->spd     = flg;
1734     if (flg) {
1735       A->symmetric                  = PETSC_TRUE;
1736       A->structurally_symmetric     = PETSC_TRUE;
1737       A->symmetric_set              = PETSC_TRUE;
1738       A->structurally_symmetric_set = PETSC_TRUE;
1739     }
1740     break;
1741   case MAT_SYMMETRIC:
1742     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1743     break;
1744   case MAT_STRUCTURALLY_SYMMETRIC:
1745     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1746     break;
1747   case MAT_HERMITIAN:
1748     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1749     break;
1750   case MAT_SYMMETRY_ETERNAL:
1751     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1752     break;
1753   default:
1754     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1755   }
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 #undef __FUNCT__
1760 #define __FUNCT__ "MatGetRow_MPIAIJ"
1761 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1762 {
1763   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1764   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1765   PetscErrorCode ierr;
1766   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1767   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1768   PetscInt       *cmap,*idx_p;
1769 
1770   PetscFunctionBegin;
1771   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1772   mat->getrowactive = PETSC_TRUE;
1773 
1774   if (!mat->rowvalues && (idx || v)) {
1775     /*
1776         allocate enough space to hold information from the longest row.
1777     */
1778     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1779     PetscInt   max = 1,tmp;
1780     for (i=0; i<matin->rmap->n; i++) {
1781       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1782       if (max < tmp) max = tmp;
1783     }
1784     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1785   }
1786 
1787   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1788   lrow = row - rstart;
1789 
1790   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1791   if (!v)   {pvA = 0; pvB = 0;}
1792   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1793   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1794   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1795   nztot = nzA + nzB;
1796 
1797   cmap = mat->garray;
1798   if (v  || idx) {
1799     if (nztot) {
1800       /* Sort by increasing column numbers, assuming A and B already sorted */
1801       PetscInt imark = -1;
1802       if (v) {
1803         *v = v_p = mat->rowvalues;
1804         for (i=0; i<nzB; i++) {
1805           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1806           else break;
1807         }
1808         imark = i;
1809         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1810         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1811       }
1812       if (idx) {
1813         *idx = idx_p = mat->rowindices;
1814         if (imark > -1) {
1815           for (i=0; i<imark; i++) {
1816             idx_p[i] = cmap[cworkB[i]];
1817           }
1818         } else {
1819           for (i=0; i<nzB; i++) {
1820             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1821             else break;
1822           }
1823           imark = i;
1824         }
1825         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1826         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1827       }
1828     } else {
1829       if (idx) *idx = 0;
1830       if (v)   *v   = 0;
1831     }
1832   }
1833   *nz  = nztot;
1834   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1835   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1836   PetscFunctionReturn(0);
1837 }
1838 
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1841 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1842 {
1843   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1844 
1845   PetscFunctionBegin;
1846   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1847   aij->getrowactive = PETSC_FALSE;
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 #undef __FUNCT__
1852 #define __FUNCT__ "MatNorm_MPIAIJ"
1853 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1854 {
1855   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1856   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1857   PetscErrorCode ierr;
1858   PetscInt       i,j,cstart = mat->cmap->rstart;
1859   PetscReal      sum = 0.0;
1860   MatScalar      *v;
1861 
1862   PetscFunctionBegin;
1863   if (aij->size == 1) {
1864     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1865   } else {
1866     if (type == NORM_FROBENIUS) {
1867       v = amat->a;
1868       for (i=0; i<amat->nz; i++) {
1869         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1870       }
1871       v = bmat->a;
1872       for (i=0; i<bmat->nz; i++) {
1873         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1874       }
1875       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1876       *norm = PetscSqrtReal(*norm);
1877     } else if (type == NORM_1) { /* max column norm */
1878       PetscReal *tmp,*tmp2;
1879       PetscInt  *jj,*garray = aij->garray;
1880       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1881       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1882       *norm = 0.0;
1883       v     = amat->a; jj = amat->j;
1884       for (j=0; j<amat->nz; j++) {
1885         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1886       }
1887       v = bmat->a; jj = bmat->j;
1888       for (j=0; j<bmat->nz; j++) {
1889         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1890       }
1891       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1892       for (j=0; j<mat->cmap->N; j++) {
1893         if (tmp2[j] > *norm) *norm = tmp2[j];
1894       }
1895       ierr = PetscFree(tmp);CHKERRQ(ierr);
1896       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1897     } else if (type == NORM_INFINITY) { /* max row norm */
1898       PetscReal ntemp = 0.0;
1899       for (j=0; j<aij->A->rmap->n; j++) {
1900         v   = amat->a + amat->i[j];
1901         sum = 0.0;
1902         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1903           sum += PetscAbsScalar(*v); v++;
1904         }
1905         v = bmat->a + bmat->i[j];
1906         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1907           sum += PetscAbsScalar(*v); v++;
1908         }
1909         if (sum > ntemp) ntemp = sum;
1910       }
1911       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1912     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatTranspose_MPIAIJ"
1919 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1920 {
1921   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1922   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1923   PetscErrorCode ierr;
1924   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1925   PetscInt       cstart = A->cmap->rstart,ncol;
1926   Mat            B;
1927   MatScalar      *array;
1928 
1929   PetscFunctionBegin;
1930   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1931 
1932   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1933   ai = Aloc->i; aj = Aloc->j;
1934   bi = Bloc->i; bj = Bloc->j;
1935   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1936     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1937     PetscSFNode          *oloc;
1938     PETSC_UNUSED PetscSF sf;
1939 
1940     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1941     /* compute d_nnz for preallocation */
1942     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1943     for (i=0; i<ai[ma]; i++) {
1944       d_nnz[aj[i]]++;
1945       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1946     }
1947     /* compute local off-diagonal contributions */
1948     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1949     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1950     /* map those to global */
1951     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1952     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1953     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1954     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1955     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1956     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1957     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1958 
1959     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1960     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1961     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
1962     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1963     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1964     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1965   } else {
1966     B    = *matout;
1967     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1968     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1969   }
1970 
1971   /* copy over the A part */
1972   array = Aloc->a;
1973   row   = A->rmap->rstart;
1974   for (i=0; i<ma; i++) {
1975     ncol = ai[i+1]-ai[i];
1976     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1977     row++;
1978     array += ncol; aj += ncol;
1979   }
1980   aj = Aloc->j;
1981   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1982 
1983   /* copy over the B part */
1984   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1985   array = Bloc->a;
1986   row   = A->rmap->rstart;
1987   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1988   cols_tmp = cols;
1989   for (i=0; i<mb; i++) {
1990     ncol = bi[i+1]-bi[i];
1991     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1992     row++;
1993     array += ncol; cols_tmp += ncol;
1994   }
1995   ierr = PetscFree(cols);CHKERRQ(ierr);
1996 
1997   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1998   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1999   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2000     *matout = B;
2001   } else {
2002     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2003   }
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2009 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2010 {
2011   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2012   Mat            a    = aij->A,b = aij->B;
2013   PetscErrorCode ierr;
2014   PetscInt       s1,s2,s3;
2015 
2016   PetscFunctionBegin;
2017   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2018   if (rr) {
2019     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2020     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2021     /* Overlap communication with computation. */
2022     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2023   }
2024   if (ll) {
2025     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2026     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2027     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2028   }
2029   /* scale  the diagonal block */
2030   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2031 
2032   if (rr) {
2033     /* Do a scatter end and then right scale the off-diagonal block */
2034     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2035     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2036   }
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 #undef __FUNCT__
2041 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2042 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2043 {
2044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2045   PetscErrorCode ierr;
2046 
2047   PetscFunctionBegin;
2048   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 #undef __FUNCT__
2053 #define __FUNCT__ "MatEqual_MPIAIJ"
2054 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2055 {
2056   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2057   Mat            a,b,c,d;
2058   PetscBool      flg;
2059   PetscErrorCode ierr;
2060 
2061   PetscFunctionBegin;
2062   a = matA->A; b = matA->B;
2063   c = matB->A; d = matB->B;
2064 
2065   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2066   if (flg) {
2067     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2068   }
2069   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 #undef __FUNCT__
2074 #define __FUNCT__ "MatCopy_MPIAIJ"
2075 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2076 {
2077   PetscErrorCode ierr;
2078   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2079   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2080 
2081   PetscFunctionBegin;
2082   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2083   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2084     /* because of the column compression in the off-processor part of the matrix a->B,
2085        the number of columns in a->B and b->B may be different, hence we cannot call
2086        the MatCopy() directly on the two parts. If need be, we can provide a more
2087        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2088        then copying the submatrices */
2089     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2090   } else {
2091     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2092     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2093   }
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #undef __FUNCT__
2098 #define __FUNCT__ "MatSetUp_MPIAIJ"
2099 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2100 {
2101   PetscErrorCode ierr;
2102 
2103   PetscFunctionBegin;
2104   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2110 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2111 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2112 {
2113   PetscInt       i,m=Y->rmap->N;
2114   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2115   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2116   const PetscInt *xi = x->i,*yi = y->i;
2117 
2118   PetscFunctionBegin;
2119   /* Set the number of nonzeros in the new matrix */
2120   for (i=0; i<m; i++) {
2121     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2122     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2123     nnz[i] = 0;
2124     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2125       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2126       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2127       nnz[i]++;
2128     }
2129     for (; k<nzy; k++) nnz[i]++;
2130   }
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatAXPY_MPIAIJ"
2136 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2137 {
2138   PetscErrorCode ierr;
2139   PetscInt       i;
2140   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2141   PetscBLASInt   bnz,one=1;
2142   Mat_SeqAIJ     *x,*y;
2143 
2144   PetscFunctionBegin;
2145   if (str == SAME_NONZERO_PATTERN) {
2146     PetscScalar alpha = a;
2147     x    = (Mat_SeqAIJ*)xx->A->data;
2148     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2149     y    = (Mat_SeqAIJ*)yy->A->data;
2150     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2151     x    = (Mat_SeqAIJ*)xx->B->data;
2152     y    = (Mat_SeqAIJ*)yy->B->data;
2153     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2154     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2155   } else if (str == SUBSET_NONZERO_PATTERN) {
2156     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2157 
2158     x = (Mat_SeqAIJ*)xx->B->data;
2159     y = (Mat_SeqAIJ*)yy->B->data;
2160     if (y->xtoy && y->XtoY != xx->B) {
2161       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2162       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2163     }
2164     if (!y->xtoy) { /* get xtoy */
2165       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2166       y->XtoY = xx->B;
2167       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2168     }
2169     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2170   } else {
2171     Mat      B;
2172     PetscInt *nnz_d,*nnz_o;
2173     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2174     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2175     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2176     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2177     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2178     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2179     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2180     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2181     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2182     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2183     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2184     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2185     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2186     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2187   }
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2192 
2193 #undef __FUNCT__
2194 #define __FUNCT__ "MatConjugate_MPIAIJ"
2195 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2196 {
2197 #if defined(PETSC_USE_COMPLEX)
2198   PetscErrorCode ierr;
2199   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2200 
2201   PetscFunctionBegin;
2202   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2203   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2204 #else
2205   PetscFunctionBegin;
2206 #endif
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 #undef __FUNCT__
2211 #define __FUNCT__ "MatRealPart_MPIAIJ"
2212 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2213 {
2214   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2215   PetscErrorCode ierr;
2216 
2217   PetscFunctionBegin;
2218   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2219   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 #undef __FUNCT__
2224 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2225 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2226 {
2227   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2228   PetscErrorCode ierr;
2229 
2230   PetscFunctionBegin;
2231   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2232   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 #if defined(PETSC_HAVE_PBGL)
2237 
2238 #include <boost/parallel/mpi/bsp_process_group.hpp>
2239 #include <boost/graph/distributed/ilu_default_graph.hpp>
2240 #include <boost/graph/distributed/ilu_0_block.hpp>
2241 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2242 #include <boost/graph/distributed/petsc/interface.hpp>
2243 #include <boost/multi_array.hpp>
2244 #include <boost/parallel/distributed_property_map->hpp>
2245 
2246 #undef __FUNCT__
2247 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2248 /*
2249   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2250 */
2251 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2252 {
2253   namespace petsc = boost::distributed::petsc;
2254 
2255   namespace graph_dist = boost::graph::distributed;
2256   using boost::graph::distributed::ilu_default::process_group_type;
2257   using boost::graph::ilu_permuted;
2258 
2259   PetscBool      row_identity, col_identity;
2260   PetscContainer c;
2261   PetscInt       m, n, M, N;
2262   PetscErrorCode ierr;
2263 
2264   PetscFunctionBegin;
2265   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2266   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2267   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2268   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2269 
2270   process_group_type pg;
2271   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2272   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2273   lgraph_type& level_graph = *lgraph_p;
2274   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2275 
2276   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2277   ilu_permuted(level_graph);
2278 
2279   /* put together the new matrix */
2280   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2281   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2282   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2283   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2284   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2285   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2286   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2287   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2288 
2289   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2290   ierr = PetscContainerSetPointer(c, lgraph_p);
2291   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2292   ierr = PetscContainerDestroy(&c);
2293   PetscFunctionReturn(0);
2294 }
2295 
2296 #undef __FUNCT__
2297 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2298 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2299 {
2300   PetscFunctionBegin;
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 #undef __FUNCT__
2305 #define __FUNCT__ "MatSolve_MPIAIJ"
2306 /*
2307   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2308 */
2309 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2310 {
2311   namespace graph_dist = boost::graph::distributed;
2312 
2313   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2314   lgraph_type    *lgraph_p;
2315   PetscContainer c;
2316   PetscErrorCode ierr;
2317 
2318   PetscFunctionBegin;
2319   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2320   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2321   ierr = VecCopy(b, x);CHKERRQ(ierr);
2322 
2323   PetscScalar *array_x;
2324   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2325   PetscInt sx;
2326   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2327 
2328   PetscScalar *array_b;
2329   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2330   PetscInt sb;
2331   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2332 
2333   lgraph_type& level_graph = *lgraph_p;
2334   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2335 
2336   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2337   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2338   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2339 
2340   typedef boost::iterator_property_map<array_ref_type::iterator,
2341                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2342   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2343   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2344 
2345   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2346   PetscFunctionReturn(0);
2347 }
2348 #endif
2349 
2350 #undef __FUNCT__
2351 #define __FUNCT__ "MatDestroy_MatRedundant"
2352 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2353 {
2354   PetscErrorCode ierr;
2355   Mat_Redundant  *redund;
2356   PetscInt       i;
2357   PetscMPIInt    size;
2358 
2359   PetscFunctionBegin;
2360   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2361   if (size == 1) {
2362     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2363     redund = a->redundant;
2364   } else {
2365     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2366     redund = a->redundant;
2367   }
2368   if (redund){
2369     if (redund->matseq) { /* via MatGetSubMatrices()  */
2370       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2371       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2372       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2373       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2374     } else {
2375       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2376       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2377       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2378       for (i=0; i<redund->nrecvs; i++) {
2379         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2380         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2381       }
2382       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2383     }
2384 
2385     if (redund->psubcomm) {
2386       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2387     }
2388     ierr = redund->Destroy(A);CHKERRQ(ierr);
2389     ierr = PetscFree(redund);CHKERRQ(ierr);
2390   }
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2396 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2397 {
2398   PetscMPIInt    rank,size;
2399   MPI_Comm       comm;
2400   PetscErrorCode ierr;
2401   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2402   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2403   PetscInt       *rowrange = mat->rmap->range;
2404   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2405   Mat            A = aij->A,B=aij->B,C=*matredundant;
2406   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2407   PetscScalar    *sbuf_a;
2408   PetscInt       nzlocal=a->nz+b->nz;
2409   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2410   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2411   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2412   MatScalar      *aworkA,*aworkB;
2413   PetscScalar    *vals;
2414   PetscMPIInt    tag1,tag2,tag3,imdex;
2415   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2416   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2417   MPI_Status     recv_status,*send_status;
2418   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2419   PetscInt       **rbuf_j=NULL;
2420   PetscScalar    **rbuf_a=NULL;
2421   Mat_Redundant  *redund =NULL;
2422 
2423   PetscFunctionBegin;
2424   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2425   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2426   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2427   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2428   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2429 
2430   if (reuse == MAT_REUSE_MATRIX) {
2431     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2432     if (subsize == 1) {
2433       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2434       redund = c->redundant;
2435     } else {
2436       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2437       redund = c->redundant;
2438     }
2439     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2440 
2441     nsends    = redund->nsends;
2442     nrecvs    = redund->nrecvs;
2443     send_rank = redund->send_rank;
2444     recv_rank = redund->recv_rank;
2445     sbuf_nz   = redund->sbuf_nz;
2446     rbuf_nz   = redund->rbuf_nz;
2447     sbuf_j    = redund->sbuf_j;
2448     sbuf_a    = redund->sbuf_a;
2449     rbuf_j    = redund->rbuf_j;
2450     rbuf_a    = redund->rbuf_a;
2451   }
2452 
2453   if (reuse == MAT_INITIAL_MATRIX) {
2454     PetscInt    nleftover,np_subcomm;
2455 
2456     /* get the destination processors' id send_rank, nsends and nrecvs */
2457     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2458 
2459     np_subcomm = size/nsubcomm;
2460     nleftover  = size - nsubcomm*np_subcomm;
2461 
2462     /* block of codes below is specific for INTERLACED */
2463     /* ------------------------------------------------*/
2464     nsends = 0; nrecvs = 0;
2465     for (i=0; i<size; i++) {
2466       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2467         send_rank[nsends++] = i;
2468         recv_rank[nrecvs++] = i;
2469       }
2470     }
2471     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2472       i = size-nleftover-1;
2473       j = 0;
2474       while (j < nsubcomm - nleftover) {
2475         send_rank[nsends++] = i;
2476         i--; j++;
2477       }
2478     }
2479 
2480     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2481       for (i=0; i<nleftover; i++) {
2482         recv_rank[nrecvs++] = size-nleftover+i;
2483       }
2484     }
2485     /*----------------------------------------------*/
2486 
2487     /* allocate sbuf_j, sbuf_a */
2488     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2489     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2490     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2491     /*
2492     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2493     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2494      */
2495   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2496 
2497   /* copy mat's local entries into the buffers */
2498   if (reuse == MAT_INITIAL_MATRIX) {
2499     rownz_max = 0;
2500     rptr      = sbuf_j;
2501     cols      = sbuf_j + rend-rstart + 1;
2502     vals      = sbuf_a;
2503     rptr[0]   = 0;
2504     for (i=0; i<rend-rstart; i++) {
2505       row    = i + rstart;
2506       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2507       ncols  = nzA + nzB;
2508       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2509       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2510       /* load the column indices for this row into cols */
2511       lwrite = 0;
2512       for (l=0; l<nzB; l++) {
2513         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2514           vals[lwrite]   = aworkB[l];
2515           cols[lwrite++] = ctmp;
2516         }
2517       }
2518       for (l=0; l<nzA; l++) {
2519         vals[lwrite]   = aworkA[l];
2520         cols[lwrite++] = cstart + cworkA[l];
2521       }
2522       for (l=0; l<nzB; l++) {
2523         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2524           vals[lwrite]   = aworkB[l];
2525           cols[lwrite++] = ctmp;
2526         }
2527       }
2528       vals     += ncols;
2529       cols     += ncols;
2530       rptr[i+1] = rptr[i] + ncols;
2531       if (rownz_max < ncols) rownz_max = ncols;
2532     }
2533     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2534   } else { /* only copy matrix values into sbuf_a */
2535     rptr    = sbuf_j;
2536     vals    = sbuf_a;
2537     rptr[0] = 0;
2538     for (i=0; i<rend-rstart; i++) {
2539       row    = i + rstart;
2540       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2541       ncols  = nzA + nzB;
2542       cworkB = b->j + b->i[i];
2543       aworkA = a->a + a->i[i];
2544       aworkB = b->a + b->i[i];
2545       lwrite = 0;
2546       for (l=0; l<nzB; l++) {
2547         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2548       }
2549       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2550       for (l=0; l<nzB; l++) {
2551         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2552       }
2553       vals     += ncols;
2554       rptr[i+1] = rptr[i] + ncols;
2555     }
2556   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2557 
2558   /* send nzlocal to others, and recv other's nzlocal */
2559   /*--------------------------------------------------*/
2560   if (reuse == MAT_INITIAL_MATRIX) {
2561     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2562 
2563     s_waits2 = s_waits3 + nsends;
2564     s_waits1 = s_waits2 + nsends;
2565     r_waits1 = s_waits1 + nsends;
2566     r_waits2 = r_waits1 + nrecvs;
2567     r_waits3 = r_waits2 + nrecvs;
2568   } else {
2569     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2570 
2571     r_waits3 = s_waits3 + nsends;
2572   }
2573 
2574   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2575   if (reuse == MAT_INITIAL_MATRIX) {
2576     /* get new tags to keep the communication clean */
2577     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2578     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2579     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2580 
2581     /* post receives of other's nzlocal */
2582     for (i=0; i<nrecvs; i++) {
2583       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2584     }
2585     /* send nzlocal to others */
2586     for (i=0; i<nsends; i++) {
2587       sbuf_nz[i] = nzlocal;
2588       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2589     }
2590     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2591     count = nrecvs;
2592     while (count) {
2593       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2594 
2595       recv_rank[imdex] = recv_status.MPI_SOURCE;
2596       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2597       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2598 
2599       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2600 
2601       rbuf_nz[imdex] += i + 2;
2602 
2603       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2604       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2605       count--;
2606     }
2607     /* wait on sends of nzlocal */
2608     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2609     /* send mat->i,j to others, and recv from other's */
2610     /*------------------------------------------------*/
2611     for (i=0; i<nsends; i++) {
2612       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2613       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2614     }
2615     /* wait on receives of mat->i,j */
2616     /*------------------------------*/
2617     count = nrecvs;
2618     while (count) {
2619       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2620       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2621       count--;
2622     }
2623     /* wait on sends of mat->i,j */
2624     /*---------------------------*/
2625     if (nsends) {
2626       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2627     }
2628   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2629 
2630   /* post receives, send and receive mat->a */
2631   /*----------------------------------------*/
2632   for (imdex=0; imdex<nrecvs; imdex++) {
2633     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2634   }
2635   for (i=0; i<nsends; i++) {
2636     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2637   }
2638   count = nrecvs;
2639   while (count) {
2640     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2641     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2642     count--;
2643   }
2644   if (nsends) {
2645     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2646   }
2647 
2648   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2649 
2650   /* create redundant matrix */
2651   /*-------------------------*/
2652   if (reuse == MAT_INITIAL_MATRIX) {
2653     const PetscInt *range;
2654     PetscInt       rstart_sub,rend_sub,mloc_sub;
2655 
2656     /* compute rownz_max for preallocation */
2657     for (imdex=0; imdex<nrecvs; imdex++) {
2658       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2659       rptr = rbuf_j[imdex];
2660       for (i=0; i<j; i++) {
2661         ncols = rptr[i+1] - rptr[i];
2662         if (rownz_max < ncols) rownz_max = ncols;
2663       }
2664     }
2665 
2666     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2667 
2668     /* get local size of redundant matrix
2669        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2670     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2671     rstart_sub = range[nsubcomm*subrank];
2672     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2673       rend_sub = range[nsubcomm*(subrank+1)];
2674     } else {
2675       rend_sub = mat->rmap->N;
2676     }
2677     mloc_sub = rend_sub - rstart_sub;
2678 
2679     if (M == N) {
2680       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2681     } else { /* non-square matrix */
2682       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2683     }
2684     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2685     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2686     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2687     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2688   } else {
2689     C = *matredundant;
2690   }
2691 
2692   /* insert local matrix entries */
2693   rptr = sbuf_j;
2694   cols = sbuf_j + rend-rstart + 1;
2695   vals = sbuf_a;
2696   for (i=0; i<rend-rstart; i++) {
2697     row   = i + rstart;
2698     ncols = rptr[i+1] - rptr[i];
2699     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2700     vals += ncols;
2701     cols += ncols;
2702   }
2703   /* insert received matrix entries */
2704   for (imdex=0; imdex<nrecvs; imdex++) {
2705     rstart = rowrange[recv_rank[imdex]];
2706     rend   = rowrange[recv_rank[imdex]+1];
2707     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2708     rptr   = rbuf_j[imdex];
2709     cols   = rbuf_j[imdex] + rend-rstart + 1;
2710     vals   = rbuf_a[imdex];
2711     for (i=0; i<rend-rstart; i++) {
2712       row   = i + rstart;
2713       ncols = rptr[i+1] - rptr[i];
2714       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2715       vals += ncols;
2716       cols += ncols;
2717     }
2718   }
2719   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2720   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2721 
2722   if (reuse == MAT_INITIAL_MATRIX) {
2723     *matredundant = C;
2724 
2725     /* create a supporting struct and attach it to C for reuse */
2726     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2727     if (subsize == 1) {
2728       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2729       c->redundant = redund;
2730     } else {
2731       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2732       c->redundant = redund;
2733     }
2734 
2735     redund->nzlocal   = nzlocal;
2736     redund->nsends    = nsends;
2737     redund->nrecvs    = nrecvs;
2738     redund->send_rank = send_rank;
2739     redund->recv_rank = recv_rank;
2740     redund->sbuf_nz   = sbuf_nz;
2741     redund->rbuf_nz   = rbuf_nz;
2742     redund->sbuf_j    = sbuf_j;
2743     redund->sbuf_a    = sbuf_a;
2744     redund->rbuf_j    = rbuf_j;
2745     redund->rbuf_a    = rbuf_a;
2746     redund->psubcomm  = NULL;
2747 
2748     redund->Destroy = C->ops->destroy;
2749     C->ops->destroy = MatDestroy_MatRedundant;
2750   }
2751   PetscFunctionReturn(0);
2752 }
2753 
2754 #undef __FUNCT__
2755 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2756 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2757 {
2758   PetscErrorCode ierr;
2759   MPI_Comm       comm;
2760   PetscMPIInt    size,subsize;
2761   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2762   Mat_Redundant  *redund=NULL;
2763   PetscSubcomm   psubcomm=NULL;
2764   MPI_Comm       subcomm_in=subcomm;
2765   Mat            *matseq;
2766   IS             isrow,iscol;
2767 
2768   PetscFunctionBegin;
2769   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2770     if (reuse ==  MAT_INITIAL_MATRIX) {
2771       /* create psubcomm, then get subcomm */
2772       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2773       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2774       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2775 
2776       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2777       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2778       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2779       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2780       subcomm = psubcomm->comm;
2781     } else { /* retrieve psubcomm and subcomm */
2782       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2783       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2784       if (subsize == 1) {
2785         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2786         redund = c->redundant;
2787       } else {
2788         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2789         redund = c->redundant;
2790       }
2791       psubcomm = redund->psubcomm;
2792     }
2793     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2794       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2795       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2796         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2797         if (subsize == 1) {
2798           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2799           c->redundant->psubcomm = psubcomm;
2800         } else {
2801           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2802           c->redundant->psubcomm = psubcomm ;
2803         }
2804       }
2805       PetscFunctionReturn(0);
2806     }
2807   }
2808 
2809   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2810   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2811   if (reuse == MAT_INITIAL_MATRIX) {
2812     /* create a local sequential matrix matseq[0] */
2813     mloc_sub = PETSC_DECIDE;
2814     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2815     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2816     rstart = rend - mloc_sub;
2817     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2818     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2819   } else { /* reuse == MAT_REUSE_MATRIX */
2820     if (subsize == 1) {
2821       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2822       redund = c->redundant;
2823     } else {
2824       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2825       redund = c->redundant;
2826     }
2827 
2828     isrow  = redund->isrow;
2829     iscol  = redund->iscol;
2830     matseq = redund->matseq;
2831   }
2832   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2833   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2834 
2835   if (reuse == MAT_INITIAL_MATRIX) {
2836     /* create a supporting struct and attach it to C for reuse */
2837     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2838     if (subsize == 1) {
2839       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2840       c->redundant = redund;
2841     } else {
2842       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2843       c->redundant = redund;
2844     }
2845     redund->isrow    = isrow;
2846     redund->iscol    = iscol;
2847     redund->matseq   = matseq;
2848     redund->psubcomm = psubcomm;
2849     redund->Destroy               = (*matredundant)->ops->destroy;
2850     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2851   }
2852   PetscFunctionReturn(0);
2853 }
2854 
2855 #undef __FUNCT__
2856 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2857 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2858 {
2859   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2860   PetscErrorCode ierr;
2861   PetscInt       i,*idxb = 0;
2862   PetscScalar    *va,*vb;
2863   Vec            vtmp;
2864 
2865   PetscFunctionBegin;
2866   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2867   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2868   if (idx) {
2869     for (i=0; i<A->rmap->n; i++) {
2870       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2871     }
2872   }
2873 
2874   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2875   if (idx) {
2876     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2877   }
2878   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2879   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2880 
2881   for (i=0; i<A->rmap->n; i++) {
2882     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2883       va[i] = vb[i];
2884       if (idx) idx[i] = a->garray[idxb[i]];
2885     }
2886   }
2887 
2888   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2889   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2890   ierr = PetscFree(idxb);CHKERRQ(ierr);
2891   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2892   PetscFunctionReturn(0);
2893 }
2894 
2895 #undef __FUNCT__
2896 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2897 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2898 {
2899   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2900   PetscErrorCode ierr;
2901   PetscInt       i,*idxb = 0;
2902   PetscScalar    *va,*vb;
2903   Vec            vtmp;
2904 
2905   PetscFunctionBegin;
2906   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2907   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2908   if (idx) {
2909     for (i=0; i<A->cmap->n; i++) {
2910       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2911     }
2912   }
2913 
2914   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2915   if (idx) {
2916     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2917   }
2918   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2919   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2920 
2921   for (i=0; i<A->rmap->n; i++) {
2922     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2923       va[i] = vb[i];
2924       if (idx) idx[i] = a->garray[idxb[i]];
2925     }
2926   }
2927 
2928   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2929   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2930   ierr = PetscFree(idxb);CHKERRQ(ierr);
2931   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2932   PetscFunctionReturn(0);
2933 }
2934 
2935 #undef __FUNCT__
2936 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2937 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2938 {
2939   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2940   PetscInt       n      = A->rmap->n;
2941   PetscInt       cstart = A->cmap->rstart;
2942   PetscInt       *cmap  = mat->garray;
2943   PetscInt       *diagIdx, *offdiagIdx;
2944   Vec            diagV, offdiagV;
2945   PetscScalar    *a, *diagA, *offdiagA;
2946   PetscInt       r;
2947   PetscErrorCode ierr;
2948 
2949   PetscFunctionBegin;
2950   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2951   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2952   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2953   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2954   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2955   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2956   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2957   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2958   for (r = 0; r < n; ++r) {
2959     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2960       a[r]   = diagA[r];
2961       idx[r] = cstart + diagIdx[r];
2962     } else {
2963       a[r]   = offdiagA[r];
2964       idx[r] = cmap[offdiagIdx[r]];
2965     }
2966   }
2967   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2968   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2969   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2970   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2971   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2972   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2973   PetscFunctionReturn(0);
2974 }
2975 
2976 #undef __FUNCT__
2977 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2978 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2979 {
2980   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2981   PetscInt       n      = A->rmap->n;
2982   PetscInt       cstart = A->cmap->rstart;
2983   PetscInt       *cmap  = mat->garray;
2984   PetscInt       *diagIdx, *offdiagIdx;
2985   Vec            diagV, offdiagV;
2986   PetscScalar    *a, *diagA, *offdiagA;
2987   PetscInt       r;
2988   PetscErrorCode ierr;
2989 
2990   PetscFunctionBegin;
2991   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2992   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2993   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2994   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2995   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2996   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2997   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2998   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2999   for (r = 0; r < n; ++r) {
3000     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3001       a[r]   = diagA[r];
3002       idx[r] = cstart + diagIdx[r];
3003     } else {
3004       a[r]   = offdiagA[r];
3005       idx[r] = cmap[offdiagIdx[r]];
3006     }
3007   }
3008   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3009   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3010   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3011   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3012   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3013   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3014   PetscFunctionReturn(0);
3015 }
3016 
3017 #undef __FUNCT__
3018 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3019 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3020 {
3021   PetscErrorCode ierr;
3022   Mat            *dummy;
3023 
3024   PetscFunctionBegin;
3025   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3026   *newmat = *dummy;
3027   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3028   PetscFunctionReturn(0);
3029 }
3030 
3031 #undef __FUNCT__
3032 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3033 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3034 {
3035   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3036   PetscErrorCode ierr;
3037 
3038   PetscFunctionBegin;
3039   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3040   PetscFunctionReturn(0);
3041 }
3042 
3043 #undef __FUNCT__
3044 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3045 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3046 {
3047   PetscErrorCode ierr;
3048   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3049 
3050   PetscFunctionBegin;
3051   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3052   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3053   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3054   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3055   PetscFunctionReturn(0);
3056 }
3057 
3058 /* -------------------------------------------------------------------*/
3059 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3060                                        MatGetRow_MPIAIJ,
3061                                        MatRestoreRow_MPIAIJ,
3062                                        MatMult_MPIAIJ,
3063                                 /* 4*/ MatMultAdd_MPIAIJ,
3064                                        MatMultTranspose_MPIAIJ,
3065                                        MatMultTransposeAdd_MPIAIJ,
3066 #if defined(PETSC_HAVE_PBGL)
3067                                        MatSolve_MPIAIJ,
3068 #else
3069                                        0,
3070 #endif
3071                                        0,
3072                                        0,
3073                                 /*10*/ 0,
3074                                        0,
3075                                        0,
3076                                        MatSOR_MPIAIJ,
3077                                        MatTranspose_MPIAIJ,
3078                                 /*15*/ MatGetInfo_MPIAIJ,
3079                                        MatEqual_MPIAIJ,
3080                                        MatGetDiagonal_MPIAIJ,
3081                                        MatDiagonalScale_MPIAIJ,
3082                                        MatNorm_MPIAIJ,
3083                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3084                                        MatAssemblyEnd_MPIAIJ,
3085                                        MatSetOption_MPIAIJ,
3086                                        MatZeroEntries_MPIAIJ,
3087                                 /*24*/ MatZeroRows_MPIAIJ,
3088                                        0,
3089 #if defined(PETSC_HAVE_PBGL)
3090                                        0,
3091 #else
3092                                        0,
3093 #endif
3094                                        0,
3095                                        0,
3096                                 /*29*/ MatSetUp_MPIAIJ,
3097 #if defined(PETSC_HAVE_PBGL)
3098                                        0,
3099 #else
3100                                        0,
3101 #endif
3102                                        0,
3103                                        0,
3104                                        0,
3105                                 /*34*/ MatDuplicate_MPIAIJ,
3106                                        0,
3107                                        0,
3108                                        0,
3109                                        0,
3110                                 /*39*/ MatAXPY_MPIAIJ,
3111                                        MatGetSubMatrices_MPIAIJ,
3112                                        MatIncreaseOverlap_MPIAIJ,
3113                                        MatGetValues_MPIAIJ,
3114                                        MatCopy_MPIAIJ,
3115                                 /*44*/ MatGetRowMax_MPIAIJ,
3116                                        MatScale_MPIAIJ,
3117                                        0,
3118                                        0,
3119                                        MatZeroRowsColumns_MPIAIJ,
3120                                 /*49*/ MatSetRandom_MPIAIJ,
3121                                        0,
3122                                        0,
3123                                        0,
3124                                        0,
3125                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3126                                        0,
3127                                        MatSetUnfactored_MPIAIJ,
3128                                        MatPermute_MPIAIJ,
3129                                        0,
3130                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3131                                        MatDestroy_MPIAIJ,
3132                                        MatView_MPIAIJ,
3133                                        0,
3134                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3135                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3136                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3137                                        0,
3138                                        0,
3139                                        0,
3140                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3141                                        MatGetRowMinAbs_MPIAIJ,
3142                                        0,
3143                                        MatSetColoring_MPIAIJ,
3144                                        0,
3145                                        MatSetValuesAdifor_MPIAIJ,
3146                                 /*75*/ MatFDColoringApply_AIJ,
3147                                        0,
3148                                        0,
3149                                        0,
3150                                        MatFindZeroDiagonals_MPIAIJ,
3151                                 /*80*/ 0,
3152                                        0,
3153                                        0,
3154                                 /*83*/ MatLoad_MPIAIJ,
3155                                        0,
3156                                        0,
3157                                        0,
3158                                        0,
3159                                        0,
3160                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3161                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3162                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3163                                        MatPtAP_MPIAIJ_MPIAIJ,
3164                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3165                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3166                                        0,
3167                                        0,
3168                                        0,
3169                                        0,
3170                                 /*99*/ 0,
3171                                        0,
3172                                        0,
3173                                        MatConjugate_MPIAIJ,
3174                                        0,
3175                                 /*104*/MatSetValuesRow_MPIAIJ,
3176                                        MatRealPart_MPIAIJ,
3177                                        MatImaginaryPart_MPIAIJ,
3178                                        0,
3179                                        0,
3180                                 /*109*/0,
3181                                        MatGetRedundantMatrix_MPIAIJ,
3182                                        MatGetRowMin_MPIAIJ,
3183                                        0,
3184                                        0,
3185                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3186                                        0,
3187                                        0,
3188                                        0,
3189                                        0,
3190                                 /*119*/0,
3191                                        0,
3192                                        0,
3193                                        0,
3194                                        MatGetMultiProcBlock_MPIAIJ,
3195                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3196                                        MatGetColumnNorms_MPIAIJ,
3197                                        MatInvertBlockDiagonal_MPIAIJ,
3198                                        0,
3199                                        MatGetSubMatricesParallel_MPIAIJ,
3200                                 /*129*/0,
3201                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3202                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3203                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3204                                        0,
3205                                 /*134*/0,
3206                                        0,
3207                                        0,
3208                                        0,
3209                                        0,
3210                                 /*139*/0,
3211                                        0,
3212                                        0,
3213                                        MatFDColoringSetUp_MPIXAIJ
3214 };
3215 
3216 /* ----------------------------------------------------------------------------------------*/
3217 
3218 #undef __FUNCT__
3219 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3220 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3221 {
3222   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3223   PetscErrorCode ierr;
3224 
3225   PetscFunctionBegin;
3226   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3227   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3228   PetscFunctionReturn(0);
3229 }
3230 
3231 #undef __FUNCT__
3232 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3233 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3234 {
3235   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3236   PetscErrorCode ierr;
3237 
3238   PetscFunctionBegin;
3239   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3240   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3241   PetscFunctionReturn(0);
3242 }
3243 
3244 #undef __FUNCT__
3245 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3246 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3247 {
3248   Mat_MPIAIJ     *b;
3249   PetscErrorCode ierr;
3250 
3251   PetscFunctionBegin;
3252   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3253   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3254   b = (Mat_MPIAIJ*)B->data;
3255 
3256   if (!B->preallocated) {
3257     /* Explicitly create 2 MATSEQAIJ matrices. */
3258     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3259     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3260     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3261     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3262     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3263     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3264     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3265     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3266     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3267     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3268   }
3269 
3270   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3271   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3272   B->preallocated = PETSC_TRUE;
3273   PetscFunctionReturn(0);
3274 }
3275 
3276 #undef __FUNCT__
3277 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3278 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3279 {
3280   Mat            mat;
3281   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3282   PetscErrorCode ierr;
3283 
3284   PetscFunctionBegin;
3285   *newmat = 0;
3286   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3287   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3288   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3289   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3290   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3291   a       = (Mat_MPIAIJ*)mat->data;
3292 
3293   mat->factortype   = matin->factortype;
3294   mat->rmap->bs     = matin->rmap->bs;
3295   mat->cmap->bs     = matin->cmap->bs;
3296   mat->assembled    = PETSC_TRUE;
3297   mat->insertmode   = NOT_SET_VALUES;
3298   mat->preallocated = PETSC_TRUE;
3299 
3300   a->size         = oldmat->size;
3301   a->rank         = oldmat->rank;
3302   a->donotstash   = oldmat->donotstash;
3303   a->roworiented  = oldmat->roworiented;
3304   a->rowindices   = 0;
3305   a->rowvalues    = 0;
3306   a->getrowactive = PETSC_FALSE;
3307 
3308   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3309   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3310 
3311   if (oldmat->colmap) {
3312 #if defined(PETSC_USE_CTABLE)
3313     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3314 #else
3315     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3316     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3317     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3318 #endif
3319   } else a->colmap = 0;
3320   if (oldmat->garray) {
3321     PetscInt len;
3322     len  = oldmat->B->cmap->n;
3323     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3324     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3325     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3326   } else a->garray = 0;
3327 
3328   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3329   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3330   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3331   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3332   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3333   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3334   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3335   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3336   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3337   *newmat = mat;
3338   PetscFunctionReturn(0);
3339 }
3340 
3341 
3342 
3343 #undef __FUNCT__
3344 #define __FUNCT__ "MatLoad_MPIAIJ"
3345 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3346 {
3347   PetscScalar    *vals,*svals;
3348   MPI_Comm       comm;
3349   PetscErrorCode ierr;
3350   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3351   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3352   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3353   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3354   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3355   int            fd;
3356   PetscInt       bs = 1;
3357 
3358   PetscFunctionBegin;
3359   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3360   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3361   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3362   if (!rank) {
3363     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3364     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3365     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3366   }
3367 
3368   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3369   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3370   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3371 
3372   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3373 
3374   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3375   M    = header[1]; N = header[2];
3376   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3377   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3378   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3379 
3380   /* If global sizes are set, check if they are consistent with that given in the file */
3381   if (sizesset) {
3382     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3383   }
3384   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3385   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3386 
3387   /* determine ownership of all (block) rows */
3388   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3389   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3390   else m = newMat->rmap->n; /* Set by user */
3391 
3392   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3393   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3394 
3395   /* First process needs enough room for process with most rows */
3396   if (!rank) {
3397     mmax = rowners[1];
3398     for (i=2; i<=size; i++) {
3399       mmax = PetscMax(mmax, rowners[i]);
3400     }
3401   } else mmax = -1;             /* unused, but compilers complain */
3402 
3403   rowners[0] = 0;
3404   for (i=2; i<=size; i++) {
3405     rowners[i] += rowners[i-1];
3406   }
3407   rstart = rowners[rank];
3408   rend   = rowners[rank+1];
3409 
3410   /* distribute row lengths to all processors */
3411   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3412   if (!rank) {
3413     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3414     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3415     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3416     for (j=0; j<m; j++) {
3417       procsnz[0] += ourlens[j];
3418     }
3419     for (i=1; i<size; i++) {
3420       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3421       /* calculate the number of nonzeros on each processor */
3422       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3423         procsnz[i] += rowlengths[j];
3424       }
3425       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3426     }
3427     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3428   } else {
3429     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3430   }
3431 
3432   if (!rank) {
3433     /* determine max buffer needed and allocate it */
3434     maxnz = 0;
3435     for (i=0; i<size; i++) {
3436       maxnz = PetscMax(maxnz,procsnz[i]);
3437     }
3438     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3439 
3440     /* read in my part of the matrix column indices  */
3441     nz   = procsnz[0];
3442     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3443     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3444 
3445     /* read in every one elses and ship off */
3446     for (i=1; i<size; i++) {
3447       nz   = procsnz[i];
3448       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3449       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3450     }
3451     ierr = PetscFree(cols);CHKERRQ(ierr);
3452   } else {
3453     /* determine buffer space needed for message */
3454     nz = 0;
3455     for (i=0; i<m; i++) {
3456       nz += ourlens[i];
3457     }
3458     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3459 
3460     /* receive message of column indices*/
3461     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3462   }
3463 
3464   /* determine column ownership if matrix is not square */
3465   if (N != M) {
3466     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3467     else n = newMat->cmap->n;
3468     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3469     cstart = cend - n;
3470   } else {
3471     cstart = rstart;
3472     cend   = rend;
3473     n      = cend - cstart;
3474   }
3475 
3476   /* loop over local rows, determining number of off diagonal entries */
3477   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3478   jj   = 0;
3479   for (i=0; i<m; i++) {
3480     for (j=0; j<ourlens[i]; j++) {
3481       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3482       jj++;
3483     }
3484   }
3485 
3486   for (i=0; i<m; i++) {
3487     ourlens[i] -= offlens[i];
3488   }
3489   if (!sizesset) {
3490     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3491   }
3492 
3493   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3494 
3495   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3496 
3497   for (i=0; i<m; i++) {
3498     ourlens[i] += offlens[i];
3499   }
3500 
3501   if (!rank) {
3502     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3503 
3504     /* read in my part of the matrix numerical values  */
3505     nz   = procsnz[0];
3506     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3507 
3508     /* insert into matrix */
3509     jj      = rstart;
3510     smycols = mycols;
3511     svals   = vals;
3512     for (i=0; i<m; i++) {
3513       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3514       smycols += ourlens[i];
3515       svals   += ourlens[i];
3516       jj++;
3517     }
3518 
3519     /* read in other processors and ship out */
3520     for (i=1; i<size; i++) {
3521       nz   = procsnz[i];
3522       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3523       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3524     }
3525     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3526   } else {
3527     /* receive numeric values */
3528     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3529 
3530     /* receive message of values*/
3531     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3532 
3533     /* insert into matrix */
3534     jj      = rstart;
3535     smycols = mycols;
3536     svals   = vals;
3537     for (i=0; i<m; i++) {
3538       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3539       smycols += ourlens[i];
3540       svals   += ourlens[i];
3541       jj++;
3542     }
3543   }
3544   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3545   ierr = PetscFree(vals);CHKERRQ(ierr);
3546   ierr = PetscFree(mycols);CHKERRQ(ierr);
3547   ierr = PetscFree(rowners);CHKERRQ(ierr);
3548   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3549   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3550   PetscFunctionReturn(0);
3551 }
3552 
3553 #undef __FUNCT__
3554 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3555 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3556 {
3557   PetscErrorCode ierr;
3558   IS             iscol_local;
3559   PetscInt       csize;
3560 
3561   PetscFunctionBegin;
3562   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3563   if (call == MAT_REUSE_MATRIX) {
3564     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3565     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3566   } else {
3567     PetscInt cbs;
3568     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3569     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3570     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3571   }
3572   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3573   if (call == MAT_INITIAL_MATRIX) {
3574     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3575     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3576   }
3577   PetscFunctionReturn(0);
3578 }
3579 
3580 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3581 #undef __FUNCT__
3582 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3583 /*
3584     Not great since it makes two copies of the submatrix, first an SeqAIJ
3585   in local and then by concatenating the local matrices the end result.
3586   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3587 
3588   Note: This requires a sequential iscol with all indices.
3589 */
3590 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3591 {
3592   PetscErrorCode ierr;
3593   PetscMPIInt    rank,size;
3594   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3595   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3596   PetscBool      allcolumns, colflag;
3597   Mat            M,Mreuse;
3598   MatScalar      *vwork,*aa;
3599   MPI_Comm       comm;
3600   Mat_SeqAIJ     *aij;
3601 
3602   PetscFunctionBegin;
3603   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3604   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3605   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3606 
3607   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3608   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3609   if (colflag && ncol == mat->cmap->N) {
3610     allcolumns = PETSC_TRUE;
3611   } else {
3612     allcolumns = PETSC_FALSE;
3613   }
3614   if (call ==  MAT_REUSE_MATRIX) {
3615     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3616     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3617     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3618   } else {
3619     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3620   }
3621 
3622   /*
3623       m - number of local rows
3624       n - number of columns (same on all processors)
3625       rstart - first row in new global matrix generated
3626   */
3627   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3628   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3629   if (call == MAT_INITIAL_MATRIX) {
3630     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3631     ii  = aij->i;
3632     jj  = aij->j;
3633 
3634     /*
3635         Determine the number of non-zeros in the diagonal and off-diagonal
3636         portions of the matrix in order to do correct preallocation
3637     */
3638 
3639     /* first get start and end of "diagonal" columns */
3640     if (csize == PETSC_DECIDE) {
3641       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3642       if (mglobal == n) { /* square matrix */
3643         nlocal = m;
3644       } else {
3645         nlocal = n/size + ((n % size) > rank);
3646       }
3647     } else {
3648       nlocal = csize;
3649     }
3650     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3651     rstart = rend - nlocal;
3652     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3653 
3654     /* next, compute all the lengths */
3655     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3656     olens = dlens + m;
3657     for (i=0; i<m; i++) {
3658       jend = ii[i+1] - ii[i];
3659       olen = 0;
3660       dlen = 0;
3661       for (j=0; j<jend; j++) {
3662         if (*jj < rstart || *jj >= rend) olen++;
3663         else dlen++;
3664         jj++;
3665       }
3666       olens[i] = olen;
3667       dlens[i] = dlen;
3668     }
3669     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3670     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3671     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3672     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3673     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3674     ierr = PetscFree(dlens);CHKERRQ(ierr);
3675   } else {
3676     PetscInt ml,nl;
3677 
3678     M    = *newmat;
3679     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3680     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3681     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3682     /*
3683          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3684        rather than the slower MatSetValues().
3685     */
3686     M->was_assembled = PETSC_TRUE;
3687     M->assembled     = PETSC_FALSE;
3688   }
3689   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3690   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3691   ii   = aij->i;
3692   jj   = aij->j;
3693   aa   = aij->a;
3694   for (i=0; i<m; i++) {
3695     row   = rstart + i;
3696     nz    = ii[i+1] - ii[i];
3697     cwork = jj;     jj += nz;
3698     vwork = aa;     aa += nz;
3699     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3700   }
3701 
3702   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3703   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3704   *newmat = M;
3705 
3706   /* save submatrix used in processor for next request */
3707   if (call ==  MAT_INITIAL_MATRIX) {
3708     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3709     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3710   }
3711   PetscFunctionReturn(0);
3712 }
3713 
3714 #undef __FUNCT__
3715 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3716 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3717 {
3718   PetscInt       m,cstart, cend,j,nnz,i,d;
3719   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3720   const PetscInt *JJ;
3721   PetscScalar    *values;
3722   PetscErrorCode ierr;
3723 
3724   PetscFunctionBegin;
3725   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3726 
3727   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3728   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3729   m      = B->rmap->n;
3730   cstart = B->cmap->rstart;
3731   cend   = B->cmap->rend;
3732   rstart = B->rmap->rstart;
3733 
3734   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3735 
3736 #if defined(PETSC_USE_DEBUGGING)
3737   for (i=0; i<m; i++) {
3738     nnz = Ii[i+1]- Ii[i];
3739     JJ  = J + Ii[i];
3740     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3741     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3742     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3743   }
3744 #endif
3745 
3746   for (i=0; i<m; i++) {
3747     nnz     = Ii[i+1]- Ii[i];
3748     JJ      = J + Ii[i];
3749     nnz_max = PetscMax(nnz_max,nnz);
3750     d       = 0;
3751     for (j=0; j<nnz; j++) {
3752       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3753     }
3754     d_nnz[i] = d;
3755     o_nnz[i] = nnz - d;
3756   }
3757   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3758   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3759 
3760   if (v) values = (PetscScalar*)v;
3761   else {
3762     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3763   }
3764 
3765   for (i=0; i<m; i++) {
3766     ii   = i + rstart;
3767     nnz  = Ii[i+1]- Ii[i];
3768     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3769   }
3770   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3771   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3772 
3773   if (!v) {
3774     ierr = PetscFree(values);CHKERRQ(ierr);
3775   }
3776   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3777   PetscFunctionReturn(0);
3778 }
3779 
3780 #undef __FUNCT__
3781 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3782 /*@
3783    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3784    (the default parallel PETSc format).
3785 
3786    Collective on MPI_Comm
3787 
3788    Input Parameters:
3789 +  B - the matrix
3790 .  i - the indices into j for the start of each local row (starts with zero)
3791 .  j - the column indices for each local row (starts with zero)
3792 -  v - optional values in the matrix
3793 
3794    Level: developer
3795 
3796    Notes:
3797        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3798      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3799      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3800 
3801        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3802 
3803        The format which is used for the sparse matrix input, is equivalent to a
3804     row-major ordering.. i.e for the following matrix, the input data expected is
3805     as shown:
3806 
3807         1 0 0
3808         2 0 3     P0
3809        -------
3810         4 5 6     P1
3811 
3812      Process0 [P0]: rows_owned=[0,1]
3813         i =  {0,1,3}  [size = nrow+1  = 2+1]
3814         j =  {0,0,2}  [size = nz = 6]
3815         v =  {1,2,3}  [size = nz = 6]
3816 
3817      Process1 [P1]: rows_owned=[2]
3818         i =  {0,3}    [size = nrow+1  = 1+1]
3819         j =  {0,1,2}  [size = nz = 6]
3820         v =  {4,5,6}  [size = nz = 6]
3821 
3822 .keywords: matrix, aij, compressed row, sparse, parallel
3823 
3824 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3825           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3826 @*/
3827 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3828 {
3829   PetscErrorCode ierr;
3830 
3831   PetscFunctionBegin;
3832   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3833   PetscFunctionReturn(0);
3834 }
3835 
3836 #undef __FUNCT__
3837 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3838 /*@C
3839    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3840    (the default parallel PETSc format).  For good matrix assembly performance
3841    the user should preallocate the matrix storage by setting the parameters
3842    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3843    performance can be increased by more than a factor of 50.
3844 
3845    Collective on MPI_Comm
3846 
3847    Input Parameters:
3848 +  A - the matrix
3849 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3850            (same value is used for all local rows)
3851 .  d_nnz - array containing the number of nonzeros in the various rows of the
3852            DIAGONAL portion of the local submatrix (possibly different for each row)
3853            or NULL, if d_nz is used to specify the nonzero structure.
3854            The size of this array is equal to the number of local rows, i.e 'm'.
3855            For matrices that will be factored, you must leave room for (and set)
3856            the diagonal entry even if it is zero.
3857 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3858            submatrix (same value is used for all local rows).
3859 -  o_nnz - array containing the number of nonzeros in the various rows of the
3860            OFF-DIAGONAL portion of the local submatrix (possibly different for
3861            each row) or NULL, if o_nz is used to specify the nonzero
3862            structure. The size of this array is equal to the number
3863            of local rows, i.e 'm'.
3864 
3865    If the *_nnz parameter is given then the *_nz parameter is ignored
3866 
3867    The AIJ format (also called the Yale sparse matrix format or
3868    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3869    storage.  The stored row and column indices begin with zero.
3870    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3871 
3872    The parallel matrix is partitioned such that the first m0 rows belong to
3873    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3874    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3875 
3876    The DIAGONAL portion of the local submatrix of a processor can be defined
3877    as the submatrix which is obtained by extraction the part corresponding to
3878    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3879    first row that belongs to the processor, r2 is the last row belonging to
3880    the this processor, and c1-c2 is range of indices of the local part of a
3881    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3882    common case of a square matrix, the row and column ranges are the same and
3883    the DIAGONAL part is also square. The remaining portion of the local
3884    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3885 
3886    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3887 
3888    You can call MatGetInfo() to get information on how effective the preallocation was;
3889    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3890    You can also run with the option -info and look for messages with the string
3891    malloc in them to see if additional memory allocation was needed.
3892 
3893    Example usage:
3894 
3895    Consider the following 8x8 matrix with 34 non-zero values, that is
3896    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3897    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3898    as follows:
3899 
3900 .vb
3901             1  2  0  |  0  3  0  |  0  4
3902     Proc0   0  5  6  |  7  0  0  |  8  0
3903             9  0 10  | 11  0  0  | 12  0
3904     -------------------------------------
3905            13  0 14  | 15 16 17  |  0  0
3906     Proc1   0 18  0  | 19 20 21  |  0  0
3907             0  0  0  | 22 23  0  | 24  0
3908     -------------------------------------
3909     Proc2  25 26 27  |  0  0 28  | 29  0
3910            30  0  0  | 31 32 33  |  0 34
3911 .ve
3912 
3913    This can be represented as a collection of submatrices as:
3914 
3915 .vb
3916       A B C
3917       D E F
3918       G H I
3919 .ve
3920 
3921    Where the submatrices A,B,C are owned by proc0, D,E,F are
3922    owned by proc1, G,H,I are owned by proc2.
3923 
3924    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3925    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3926    The 'M','N' parameters are 8,8, and have the same values on all procs.
3927 
3928    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3929    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3930    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3931    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3932    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3933    matrix, ans [DF] as another SeqAIJ matrix.
3934 
3935    When d_nz, o_nz parameters are specified, d_nz storage elements are
3936    allocated for every row of the local diagonal submatrix, and o_nz
3937    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3938    One way to choose d_nz and o_nz is to use the max nonzerors per local
3939    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3940    In this case, the values of d_nz,o_nz are:
3941 .vb
3942      proc0 : dnz = 2, o_nz = 2
3943      proc1 : dnz = 3, o_nz = 2
3944      proc2 : dnz = 1, o_nz = 4
3945 .ve
3946    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3947    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3948    for proc3. i.e we are using 12+15+10=37 storage locations to store
3949    34 values.
3950 
3951    When d_nnz, o_nnz parameters are specified, the storage is specified
3952    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3953    In the above case the values for d_nnz,o_nnz are:
3954 .vb
3955      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3956      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3957      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3958 .ve
3959    Here the space allocated is sum of all the above values i.e 34, and
3960    hence pre-allocation is perfect.
3961 
3962    Level: intermediate
3963 
3964 .keywords: matrix, aij, compressed row, sparse, parallel
3965 
3966 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3967           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3968 @*/
3969 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3970 {
3971   PetscErrorCode ierr;
3972 
3973   PetscFunctionBegin;
3974   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3975   PetscValidType(B,1);
3976   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3977   PetscFunctionReturn(0);
3978 }
3979 
3980 #undef __FUNCT__
3981 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3982 /*@
3983      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3984          CSR format the local rows.
3985 
3986    Collective on MPI_Comm
3987 
3988    Input Parameters:
3989 +  comm - MPI communicator
3990 .  m - number of local rows (Cannot be PETSC_DECIDE)
3991 .  n - This value should be the same as the local size used in creating the
3992        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3993        calculated if N is given) For square matrices n is almost always m.
3994 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3995 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3996 .   i - row indices
3997 .   j - column indices
3998 -   a - matrix values
3999 
4000    Output Parameter:
4001 .   mat - the matrix
4002 
4003    Level: intermediate
4004 
4005    Notes:
4006        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4007      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4008      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4009 
4010        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4011 
4012        The format which is used for the sparse matrix input, is equivalent to a
4013     row-major ordering.. i.e for the following matrix, the input data expected is
4014     as shown:
4015 
4016         1 0 0
4017         2 0 3     P0
4018        -------
4019         4 5 6     P1
4020 
4021      Process0 [P0]: rows_owned=[0,1]
4022         i =  {0,1,3}  [size = nrow+1  = 2+1]
4023         j =  {0,0,2}  [size = nz = 6]
4024         v =  {1,2,3}  [size = nz = 6]
4025 
4026      Process1 [P1]: rows_owned=[2]
4027         i =  {0,3}    [size = nrow+1  = 1+1]
4028         j =  {0,1,2}  [size = nz = 6]
4029         v =  {4,5,6}  [size = nz = 6]
4030 
4031 .keywords: matrix, aij, compressed row, sparse, parallel
4032 
4033 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4034           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4035 @*/
4036 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4037 {
4038   PetscErrorCode ierr;
4039 
4040   PetscFunctionBegin;
4041   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4042   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4043   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4044   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4045   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4046   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4047   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4048   PetscFunctionReturn(0);
4049 }
4050 
4051 #undef __FUNCT__
4052 #define __FUNCT__ "MatCreateAIJ"
4053 /*@C
4054    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4055    (the default parallel PETSc format).  For good matrix assembly performance
4056    the user should preallocate the matrix storage by setting the parameters
4057    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4058    performance can be increased by more than a factor of 50.
4059 
4060    Collective on MPI_Comm
4061 
4062    Input Parameters:
4063 +  comm - MPI communicator
4064 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4065            This value should be the same as the local size used in creating the
4066            y vector for the matrix-vector product y = Ax.
4067 .  n - This value should be the same as the local size used in creating the
4068        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4069        calculated if N is given) For square matrices n is almost always m.
4070 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4071 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4072 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4073            (same value is used for all local rows)
4074 .  d_nnz - array containing the number of nonzeros in the various rows of the
4075            DIAGONAL portion of the local submatrix (possibly different for each row)
4076            or NULL, if d_nz is used to specify the nonzero structure.
4077            The size of this array is equal to the number of local rows, i.e 'm'.
4078 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4079            submatrix (same value is used for all local rows).
4080 -  o_nnz - array containing the number of nonzeros in the various rows of the
4081            OFF-DIAGONAL portion of the local submatrix (possibly different for
4082            each row) or NULL, if o_nz is used to specify the nonzero
4083            structure. The size of this array is equal to the number
4084            of local rows, i.e 'm'.
4085 
4086    Output Parameter:
4087 .  A - the matrix
4088 
4089    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4090    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4091    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4092 
4093    Notes:
4094    If the *_nnz parameter is given then the *_nz parameter is ignored
4095 
4096    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4097    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4098    storage requirements for this matrix.
4099 
4100    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4101    processor than it must be used on all processors that share the object for
4102    that argument.
4103 
4104    The user MUST specify either the local or global matrix dimensions
4105    (possibly both).
4106 
4107    The parallel matrix is partitioned across processors such that the
4108    first m0 rows belong to process 0, the next m1 rows belong to
4109    process 1, the next m2 rows belong to process 2 etc.. where
4110    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4111    values corresponding to [m x N] submatrix.
4112 
4113    The columns are logically partitioned with the n0 columns belonging
4114    to 0th partition, the next n1 columns belonging to the next
4115    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4116 
4117    The DIAGONAL portion of the local submatrix on any given processor
4118    is the submatrix corresponding to the rows and columns m,n
4119    corresponding to the given processor. i.e diagonal matrix on
4120    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4121    etc. The remaining portion of the local submatrix [m x (N-n)]
4122    constitute the OFF-DIAGONAL portion. The example below better
4123    illustrates this concept.
4124 
4125    For a square global matrix we define each processor's diagonal portion
4126    to be its local rows and the corresponding columns (a square submatrix);
4127    each processor's off-diagonal portion encompasses the remainder of the
4128    local matrix (a rectangular submatrix).
4129 
4130    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4131 
4132    When calling this routine with a single process communicator, a matrix of
4133    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4134    type of communicator, use the construction mechanism:
4135      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4136 
4137    By default, this format uses inodes (identical nodes) when possible.
4138    We search for consecutive rows with the same nonzero structure, thereby
4139    reusing matrix information to achieve increased efficiency.
4140 
4141    Options Database Keys:
4142 +  -mat_no_inode  - Do not use inodes
4143 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4144 -  -mat_aij_oneindex - Internally use indexing starting at 1
4145         rather than 0.  Note that when calling MatSetValues(),
4146         the user still MUST index entries starting at 0!
4147 
4148 
4149    Example usage:
4150 
4151    Consider the following 8x8 matrix with 34 non-zero values, that is
4152    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4153    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4154    as follows:
4155 
4156 .vb
4157             1  2  0  |  0  3  0  |  0  4
4158     Proc0   0  5  6  |  7  0  0  |  8  0
4159             9  0 10  | 11  0  0  | 12  0
4160     -------------------------------------
4161            13  0 14  | 15 16 17  |  0  0
4162     Proc1   0 18  0  | 19 20 21  |  0  0
4163             0  0  0  | 22 23  0  | 24  0
4164     -------------------------------------
4165     Proc2  25 26 27  |  0  0 28  | 29  0
4166            30  0  0  | 31 32 33  |  0 34
4167 .ve
4168 
4169    This can be represented as a collection of submatrices as:
4170 
4171 .vb
4172       A B C
4173       D E F
4174       G H I
4175 .ve
4176 
4177    Where the submatrices A,B,C are owned by proc0, D,E,F are
4178    owned by proc1, G,H,I are owned by proc2.
4179 
4180    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4181    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4182    The 'M','N' parameters are 8,8, and have the same values on all procs.
4183 
4184    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4185    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4186    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4187    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4188    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4189    matrix, ans [DF] as another SeqAIJ matrix.
4190 
4191    When d_nz, o_nz parameters are specified, d_nz storage elements are
4192    allocated for every row of the local diagonal submatrix, and o_nz
4193    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4194    One way to choose d_nz and o_nz is to use the max nonzerors per local
4195    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4196    In this case, the values of d_nz,o_nz are:
4197 .vb
4198      proc0 : dnz = 2, o_nz = 2
4199      proc1 : dnz = 3, o_nz = 2
4200      proc2 : dnz = 1, o_nz = 4
4201 .ve
4202    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4203    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4204    for proc3. i.e we are using 12+15+10=37 storage locations to store
4205    34 values.
4206 
4207    When d_nnz, o_nnz parameters are specified, the storage is specified
4208    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4209    In the above case the values for d_nnz,o_nnz are:
4210 .vb
4211      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4212      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4213      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4214 .ve
4215    Here the space allocated is sum of all the above values i.e 34, and
4216    hence pre-allocation is perfect.
4217 
4218    Level: intermediate
4219 
4220 .keywords: matrix, aij, compressed row, sparse, parallel
4221 
4222 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4223           MPIAIJ, MatCreateMPIAIJWithArrays()
4224 @*/
4225 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4226 {
4227   PetscErrorCode ierr;
4228   PetscMPIInt    size;
4229 
4230   PetscFunctionBegin;
4231   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4232   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4233   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4234   if (size > 1) {
4235     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4236     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4237   } else {
4238     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4239     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4240   }
4241   PetscFunctionReturn(0);
4242 }
4243 
4244 #undef __FUNCT__
4245 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4246 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4247 {
4248   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4249 
4250   PetscFunctionBegin;
4251   *Ad     = a->A;
4252   *Ao     = a->B;
4253   *colmap = a->garray;
4254   PetscFunctionReturn(0);
4255 }
4256 
4257 #undef __FUNCT__
4258 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4259 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4260 {
4261   PetscErrorCode ierr;
4262   PetscInt       i;
4263   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4264 
4265   PetscFunctionBegin;
4266   if (coloring->ctype == IS_COLORING_GLOBAL) {
4267     ISColoringValue *allcolors,*colors;
4268     ISColoring      ocoloring;
4269 
4270     /* set coloring for diagonal portion */
4271     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4272 
4273     /* set coloring for off-diagonal portion */
4274     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4275     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4276     for (i=0; i<a->B->cmap->n; i++) {
4277       colors[i] = allcolors[a->garray[i]];
4278     }
4279     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4280     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4281     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4282     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4283   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4284     ISColoringValue *colors;
4285     PetscInt        *larray;
4286     ISColoring      ocoloring;
4287 
4288     /* set coloring for diagonal portion */
4289     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4290     for (i=0; i<a->A->cmap->n; i++) {
4291       larray[i] = i + A->cmap->rstart;
4292     }
4293     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4294     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4295     for (i=0; i<a->A->cmap->n; i++) {
4296       colors[i] = coloring->colors[larray[i]];
4297     }
4298     ierr = PetscFree(larray);CHKERRQ(ierr);
4299     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4300     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4301     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4302 
4303     /* set coloring for off-diagonal portion */
4304     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4305     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4306     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4307     for (i=0; i<a->B->cmap->n; i++) {
4308       colors[i] = coloring->colors[larray[i]];
4309     }
4310     ierr = PetscFree(larray);CHKERRQ(ierr);
4311     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4312     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4313     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4314   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4315   PetscFunctionReturn(0);
4316 }
4317 
4318 #undef __FUNCT__
4319 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4320 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4321 {
4322   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4323   PetscErrorCode ierr;
4324 
4325   PetscFunctionBegin;
4326   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4327   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4328   PetscFunctionReturn(0);
4329 }
4330 
4331 #undef __FUNCT__
4332 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4333 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4334 {
4335   PetscErrorCode ierr;
4336   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4337   PetscInt       *indx;
4338 
4339   PetscFunctionBegin;
4340   /* This routine will ONLY return MPIAIJ type matrix */
4341   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4342   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4343   if (n == PETSC_DECIDE) {
4344     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4345   }
4346   /* Check sum(n) = N */
4347   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4348   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4349 
4350   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4351   rstart -= m;
4352 
4353   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4354   for (i=0; i<m; i++) {
4355     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4356     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4357     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4358   }
4359 
4360   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4361   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4362   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4363   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4364   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4365   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4366   PetscFunctionReturn(0);
4367 }
4368 
4369 #undef __FUNCT__
4370 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4371 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4372 {
4373   PetscErrorCode ierr;
4374   PetscInt       m,N,i,rstart,nnz,Ii;
4375   PetscInt       *indx;
4376   PetscScalar    *values;
4377 
4378   PetscFunctionBegin;
4379   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4380   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4381   for (i=0; i<m; i++) {
4382     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4383     Ii   = i + rstart;
4384     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4385     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4386   }
4387   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4388   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4389   PetscFunctionReturn(0);
4390 }
4391 
4392 #undef __FUNCT__
4393 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4394 /*@
4395       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4396                  matrices from each processor
4397 
4398     Collective on MPI_Comm
4399 
4400    Input Parameters:
4401 +    comm - the communicators the parallel matrix will live on
4402 .    inmat - the input sequential matrices
4403 .    n - number of local columns (or PETSC_DECIDE)
4404 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4405 
4406    Output Parameter:
4407 .    outmat - the parallel matrix generated
4408 
4409     Level: advanced
4410 
4411    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4412 
4413 @*/
4414 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4415 {
4416   PetscErrorCode ierr;
4417   PetscMPIInt    size;
4418 
4419   PetscFunctionBegin;
4420   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4421   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4422   if (size == 1) {
4423     if (scall == MAT_INITIAL_MATRIX) {
4424       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4425     } else {
4426       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4427     }
4428   } else {
4429     if (scall == MAT_INITIAL_MATRIX) {
4430       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4431     }
4432     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4433   }
4434   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4435   PetscFunctionReturn(0);
4436 }
4437 
4438 #undef __FUNCT__
4439 #define __FUNCT__ "MatFileSplit"
4440 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4441 {
4442   PetscErrorCode    ierr;
4443   PetscMPIInt       rank;
4444   PetscInt          m,N,i,rstart,nnz;
4445   size_t            len;
4446   const PetscInt    *indx;
4447   PetscViewer       out;
4448   char              *name;
4449   Mat               B;
4450   const PetscScalar *values;
4451 
4452   PetscFunctionBegin;
4453   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4454   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4455   /* Should this be the type of the diagonal block of A? */
4456   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4457   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4458   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4459   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4460   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4461   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4462   for (i=0; i<m; i++) {
4463     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4464     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4465     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4466   }
4467   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4468   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4469 
4470   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4471   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4472   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4473   sprintf(name,"%s.%d",outfile,rank);
4474   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4475   ierr = PetscFree(name);CHKERRQ(ierr);
4476   ierr = MatView(B,out);CHKERRQ(ierr);
4477   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4478   ierr = MatDestroy(&B);CHKERRQ(ierr);
4479   PetscFunctionReturn(0);
4480 }
4481 
4482 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4483 #undef __FUNCT__
4484 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4485 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4486 {
4487   PetscErrorCode      ierr;
4488   Mat_Merge_SeqsToMPI *merge;
4489   PetscContainer      container;
4490 
4491   PetscFunctionBegin;
4492   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4493   if (container) {
4494     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4496     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4497     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4498     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4500     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4501     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4502     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4503     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4504     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4505     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4506     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4507     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4508     ierr = PetscFree(merge);CHKERRQ(ierr);
4509     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4510   }
4511   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4512   PetscFunctionReturn(0);
4513 }
4514 
4515 #include <../src/mat/utils/freespace.h>
4516 #include <petscbt.h>
4517 
4518 #undef __FUNCT__
4519 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4520 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4521 {
4522   PetscErrorCode      ierr;
4523   MPI_Comm            comm;
4524   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4525   PetscMPIInt         size,rank,taga,*len_s;
4526   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4527   PetscInt            proc,m;
4528   PetscInt            **buf_ri,**buf_rj;
4529   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4530   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4531   MPI_Request         *s_waits,*r_waits;
4532   MPI_Status          *status;
4533   MatScalar           *aa=a->a;
4534   MatScalar           **abuf_r,*ba_i;
4535   Mat_Merge_SeqsToMPI *merge;
4536   PetscContainer      container;
4537 
4538   PetscFunctionBegin;
4539   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4540   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4541 
4542   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4543   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4544 
4545   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4546   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4547 
4548   bi     = merge->bi;
4549   bj     = merge->bj;
4550   buf_ri = merge->buf_ri;
4551   buf_rj = merge->buf_rj;
4552 
4553   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4554   owners = merge->rowmap->range;
4555   len_s  = merge->len_s;
4556 
4557   /* send and recv matrix values */
4558   /*-----------------------------*/
4559   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4560   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4561 
4562   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4563   for (proc=0,k=0; proc<size; proc++) {
4564     if (!len_s[proc]) continue;
4565     i    = owners[proc];
4566     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4567     k++;
4568   }
4569 
4570   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4571   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4572   ierr = PetscFree(status);CHKERRQ(ierr);
4573 
4574   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4575   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4576 
4577   /* insert mat values of mpimat */
4578   /*----------------------------*/
4579   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4580   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4581 
4582   for (k=0; k<merge->nrecv; k++) {
4583     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4584     nrows       = *(buf_ri_k[k]);
4585     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4586     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4587   }
4588 
4589   /* set values of ba */
4590   m = merge->rowmap->n;
4591   for (i=0; i<m; i++) {
4592     arow = owners[rank] + i;
4593     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4594     bnzi = bi[i+1] - bi[i];
4595     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4596 
4597     /* add local non-zero vals of this proc's seqmat into ba */
4598     anzi   = ai[arow+1] - ai[arow];
4599     aj     = a->j + ai[arow];
4600     aa     = a->a + ai[arow];
4601     nextaj = 0;
4602     for (j=0; nextaj<anzi; j++) {
4603       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4604         ba_i[j] += aa[nextaj++];
4605       }
4606     }
4607 
4608     /* add received vals into ba */
4609     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4610       /* i-th row */
4611       if (i == *nextrow[k]) {
4612         anzi   = *(nextai[k]+1) - *nextai[k];
4613         aj     = buf_rj[k] + *(nextai[k]);
4614         aa     = abuf_r[k] + *(nextai[k]);
4615         nextaj = 0;
4616         for (j=0; nextaj<anzi; j++) {
4617           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4618             ba_i[j] += aa[nextaj++];
4619           }
4620         }
4621         nextrow[k]++; nextai[k]++;
4622       }
4623     }
4624     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4625   }
4626   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4627   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4628 
4629   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4630   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4631   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4632   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4633   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4634   PetscFunctionReturn(0);
4635 }
4636 
4637 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4638 
4639 #undef __FUNCT__
4640 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4641 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4642 {
4643   PetscErrorCode      ierr;
4644   Mat                 B_mpi;
4645   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4646   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4647   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4648   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4649   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4650   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4651   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4652   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4653   MPI_Status          *status;
4654   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4655   PetscBT             lnkbt;
4656   Mat_Merge_SeqsToMPI *merge;
4657   PetscContainer      container;
4658 
4659   PetscFunctionBegin;
4660   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4661 
4662   /* make sure it is a PETSc comm */
4663   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4664   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4665   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4666 
4667   ierr = PetscNew(&merge);CHKERRQ(ierr);
4668   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4669 
4670   /* determine row ownership */
4671   /*---------------------------------------------------------*/
4672   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4673   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4674   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4675   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4676   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4677   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4678   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4679 
4680   m      = merge->rowmap->n;
4681   owners = merge->rowmap->range;
4682 
4683   /* determine the number of messages to send, their lengths */
4684   /*---------------------------------------------------------*/
4685   len_s = merge->len_s;
4686 
4687   len          = 0; /* length of buf_si[] */
4688   merge->nsend = 0;
4689   for (proc=0; proc<size; proc++) {
4690     len_si[proc] = 0;
4691     if (proc == rank) {
4692       len_s[proc] = 0;
4693     } else {
4694       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4695       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4696     }
4697     if (len_s[proc]) {
4698       merge->nsend++;
4699       nrows = 0;
4700       for (i=owners[proc]; i<owners[proc+1]; i++) {
4701         if (ai[i+1] > ai[i]) nrows++;
4702       }
4703       len_si[proc] = 2*(nrows+1);
4704       len         += len_si[proc];
4705     }
4706   }
4707 
4708   /* determine the number and length of messages to receive for ij-structure */
4709   /*-------------------------------------------------------------------------*/
4710   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4711   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4712 
4713   /* post the Irecv of j-structure */
4714   /*-------------------------------*/
4715   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4716   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4717 
4718   /* post the Isend of j-structure */
4719   /*--------------------------------*/
4720   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4721 
4722   for (proc=0, k=0; proc<size; proc++) {
4723     if (!len_s[proc]) continue;
4724     i    = owners[proc];
4725     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4726     k++;
4727   }
4728 
4729   /* receives and sends of j-structure are complete */
4730   /*------------------------------------------------*/
4731   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4732   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4733 
4734   /* send and recv i-structure */
4735   /*---------------------------*/
4736   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4737   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4738 
4739   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4740   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4741   for (proc=0,k=0; proc<size; proc++) {
4742     if (!len_s[proc]) continue;
4743     /* form outgoing message for i-structure:
4744          buf_si[0]:                 nrows to be sent
4745                [1:nrows]:           row index (global)
4746                [nrows+1:2*nrows+1]: i-structure index
4747     */
4748     /*-------------------------------------------*/
4749     nrows       = len_si[proc]/2 - 1;
4750     buf_si_i    = buf_si + nrows+1;
4751     buf_si[0]   = nrows;
4752     buf_si_i[0] = 0;
4753     nrows       = 0;
4754     for (i=owners[proc]; i<owners[proc+1]; i++) {
4755       anzi = ai[i+1] - ai[i];
4756       if (anzi) {
4757         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4758         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4759         nrows++;
4760       }
4761     }
4762     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4763     k++;
4764     buf_si += len_si[proc];
4765   }
4766 
4767   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4768   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4769 
4770   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4771   for (i=0; i<merge->nrecv; i++) {
4772     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4773   }
4774 
4775   ierr = PetscFree(len_si);CHKERRQ(ierr);
4776   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4777   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4778   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4779   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4780   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4781   ierr = PetscFree(status);CHKERRQ(ierr);
4782 
4783   /* compute a local seq matrix in each processor */
4784   /*----------------------------------------------*/
4785   /* allocate bi array and free space for accumulating nonzero column info */
4786   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4787   bi[0] = 0;
4788 
4789   /* create and initialize a linked list */
4790   nlnk = N+1;
4791   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4792 
4793   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4794   len  = ai[owners[rank+1]] - ai[owners[rank]];
4795   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4796 
4797   current_space = free_space;
4798 
4799   /* determine symbolic info for each local row */
4800   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4801 
4802   for (k=0; k<merge->nrecv; k++) {
4803     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4804     nrows       = *buf_ri_k[k];
4805     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4806     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4807   }
4808 
4809   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4810   len  = 0;
4811   for (i=0; i<m; i++) {
4812     bnzi = 0;
4813     /* add local non-zero cols of this proc's seqmat into lnk */
4814     arow  = owners[rank] + i;
4815     anzi  = ai[arow+1] - ai[arow];
4816     aj    = a->j + ai[arow];
4817     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4818     bnzi += nlnk;
4819     /* add received col data into lnk */
4820     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4821       if (i == *nextrow[k]) { /* i-th row */
4822         anzi  = *(nextai[k]+1) - *nextai[k];
4823         aj    = buf_rj[k] + *nextai[k];
4824         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4825         bnzi += nlnk;
4826         nextrow[k]++; nextai[k]++;
4827       }
4828     }
4829     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4830 
4831     /* if free space is not available, make more free space */
4832     if (current_space->local_remaining<bnzi) {
4833       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4834       nspacedouble++;
4835     }
4836     /* copy data into free space, then initialize lnk */
4837     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4838     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4839 
4840     current_space->array           += bnzi;
4841     current_space->local_used      += bnzi;
4842     current_space->local_remaining -= bnzi;
4843 
4844     bi[i+1] = bi[i] + bnzi;
4845   }
4846 
4847   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4848 
4849   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4850   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4851   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4852 
4853   /* create symbolic parallel matrix B_mpi */
4854   /*---------------------------------------*/
4855   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4856   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4857   if (n==PETSC_DECIDE) {
4858     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4859   } else {
4860     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4861   }
4862   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4863   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4864   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4865   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4866   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4867 
4868   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4869   B_mpi->assembled    = PETSC_FALSE;
4870   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4871   merge->bi           = bi;
4872   merge->bj           = bj;
4873   merge->buf_ri       = buf_ri;
4874   merge->buf_rj       = buf_rj;
4875   merge->coi          = NULL;
4876   merge->coj          = NULL;
4877   merge->owners_co    = NULL;
4878 
4879   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4880 
4881   /* attach the supporting struct to B_mpi for reuse */
4882   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4883   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4884   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4885   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4886   *mpimat = B_mpi;
4887 
4888   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4889   PetscFunctionReturn(0);
4890 }
4891 
4892 #undef __FUNCT__
4893 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4894 /*@C
4895       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4896                  matrices from each processor
4897 
4898     Collective on MPI_Comm
4899 
4900    Input Parameters:
4901 +    comm - the communicators the parallel matrix will live on
4902 .    seqmat - the input sequential matrices
4903 .    m - number of local rows (or PETSC_DECIDE)
4904 .    n - number of local columns (or PETSC_DECIDE)
4905 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4906 
4907    Output Parameter:
4908 .    mpimat - the parallel matrix generated
4909 
4910     Level: advanced
4911 
4912    Notes:
4913      The dimensions of the sequential matrix in each processor MUST be the same.
4914      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4915      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4916 @*/
4917 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4918 {
4919   PetscErrorCode ierr;
4920   PetscMPIInt    size;
4921 
4922   PetscFunctionBegin;
4923   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4924   if (size == 1) {
4925     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4926     if (scall == MAT_INITIAL_MATRIX) {
4927       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4928     } else {
4929       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4930     }
4931     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4932     PetscFunctionReturn(0);
4933   }
4934   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4935   if (scall == MAT_INITIAL_MATRIX) {
4936     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4937   }
4938   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4939   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4940   PetscFunctionReturn(0);
4941 }
4942 
4943 #undef __FUNCT__
4944 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4945 /*@
4946      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4947           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4948           with MatGetSize()
4949 
4950     Not Collective
4951 
4952    Input Parameters:
4953 +    A - the matrix
4954 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4955 
4956    Output Parameter:
4957 .    A_loc - the local sequential matrix generated
4958 
4959     Level: developer
4960 
4961 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4962 
4963 @*/
4964 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4965 {
4966   PetscErrorCode ierr;
4967   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4968   Mat_SeqAIJ     *mat,*a,*b;
4969   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4970   MatScalar      *aa,*ba,*cam;
4971   PetscScalar    *ca;
4972   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4973   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4974   PetscBool      match;
4975 
4976   PetscFunctionBegin;
4977   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4978   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4979   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4980   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4981   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4982   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4983   aa = a->a; ba = b->a;
4984   if (scall == MAT_INITIAL_MATRIX) {
4985     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4986     ci[0] = 0;
4987     for (i=0; i<am; i++) {
4988       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4989     }
4990     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4991     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4992     k    = 0;
4993     for (i=0; i<am; i++) {
4994       ncols_o = bi[i+1] - bi[i];
4995       ncols_d = ai[i+1] - ai[i];
4996       /* off-diagonal portion of A */
4997       for (jo=0; jo<ncols_o; jo++) {
4998         col = cmap[*bj];
4999         if (col >= cstart) break;
5000         cj[k]   = col; bj++;
5001         ca[k++] = *ba++;
5002       }
5003       /* diagonal portion of A */
5004       for (j=0; j<ncols_d; j++) {
5005         cj[k]   = cstart + *aj++;
5006         ca[k++] = *aa++;
5007       }
5008       /* off-diagonal portion of A */
5009       for (j=jo; j<ncols_o; j++) {
5010         cj[k]   = cmap[*bj++];
5011         ca[k++] = *ba++;
5012       }
5013     }
5014     /* put together the new matrix */
5015     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5016     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5017     /* Since these are PETSc arrays, change flags to free them as necessary. */
5018     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5019     mat->free_a  = PETSC_TRUE;
5020     mat->free_ij = PETSC_TRUE;
5021     mat->nonew   = 0;
5022   } else if (scall == MAT_REUSE_MATRIX) {
5023     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5024     ci = mat->i; cj = mat->j; cam = mat->a;
5025     for (i=0; i<am; i++) {
5026       /* off-diagonal portion of A */
5027       ncols_o = bi[i+1] - bi[i];
5028       for (jo=0; jo<ncols_o; jo++) {
5029         col = cmap[*bj];
5030         if (col >= cstart) break;
5031         *cam++ = *ba++; bj++;
5032       }
5033       /* diagonal portion of A */
5034       ncols_d = ai[i+1] - ai[i];
5035       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5036       /* off-diagonal portion of A */
5037       for (j=jo; j<ncols_o; j++) {
5038         *cam++ = *ba++; bj++;
5039       }
5040     }
5041   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5042   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5043   PetscFunctionReturn(0);
5044 }
5045 
5046 #undef __FUNCT__
5047 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5048 /*@C
5049      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5050 
5051     Not Collective
5052 
5053    Input Parameters:
5054 +    A - the matrix
5055 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5056 -    row, col - index sets of rows and columns to extract (or NULL)
5057 
5058    Output Parameter:
5059 .    A_loc - the local sequential matrix generated
5060 
5061     Level: developer
5062 
5063 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5064 
5065 @*/
5066 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5067 {
5068   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5069   PetscErrorCode ierr;
5070   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5071   IS             isrowa,iscola;
5072   Mat            *aloc;
5073   PetscBool      match;
5074 
5075   PetscFunctionBegin;
5076   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5077   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5078   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5079   if (!row) {
5080     start = A->rmap->rstart; end = A->rmap->rend;
5081     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5082   } else {
5083     isrowa = *row;
5084   }
5085   if (!col) {
5086     start = A->cmap->rstart;
5087     cmap  = a->garray;
5088     nzA   = a->A->cmap->n;
5089     nzB   = a->B->cmap->n;
5090     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5091     ncols = 0;
5092     for (i=0; i<nzB; i++) {
5093       if (cmap[i] < start) idx[ncols++] = cmap[i];
5094       else break;
5095     }
5096     imark = i;
5097     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5098     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5099     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5100   } else {
5101     iscola = *col;
5102   }
5103   if (scall != MAT_INITIAL_MATRIX) {
5104     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5105     aloc[0] = *A_loc;
5106   }
5107   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5108   *A_loc = aloc[0];
5109   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5110   if (!row) {
5111     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5112   }
5113   if (!col) {
5114     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5115   }
5116   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5117   PetscFunctionReturn(0);
5118 }
5119 
5120 #undef __FUNCT__
5121 #define __FUNCT__ "MatGetBrowsOfAcols"
5122 /*@C
5123     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5124 
5125     Collective on Mat
5126 
5127    Input Parameters:
5128 +    A,B - the matrices in mpiaij format
5129 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5130 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5131 
5132    Output Parameter:
5133 +    rowb, colb - index sets of rows and columns of B to extract
5134 -    B_seq - the sequential matrix generated
5135 
5136     Level: developer
5137 
5138 @*/
5139 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5140 {
5141   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5142   PetscErrorCode ierr;
5143   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5144   IS             isrowb,iscolb;
5145   Mat            *bseq=NULL;
5146 
5147   PetscFunctionBegin;
5148   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5149     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5150   }
5151   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5152 
5153   if (scall == MAT_INITIAL_MATRIX) {
5154     start = A->cmap->rstart;
5155     cmap  = a->garray;
5156     nzA   = a->A->cmap->n;
5157     nzB   = a->B->cmap->n;
5158     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5159     ncols = 0;
5160     for (i=0; i<nzB; i++) {  /* row < local row index */
5161       if (cmap[i] < start) idx[ncols++] = cmap[i];
5162       else break;
5163     }
5164     imark = i;
5165     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5166     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5167     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5168     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5169   } else {
5170     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5171     isrowb  = *rowb; iscolb = *colb;
5172     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5173     bseq[0] = *B_seq;
5174   }
5175   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5176   *B_seq = bseq[0];
5177   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5178   if (!rowb) {
5179     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5180   } else {
5181     *rowb = isrowb;
5182   }
5183   if (!colb) {
5184     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5185   } else {
5186     *colb = iscolb;
5187   }
5188   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5189   PetscFunctionReturn(0);
5190 }
5191 
5192 #undef __FUNCT__
5193 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5194 /*
5195     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5196     of the OFF-DIAGONAL portion of local A
5197 
5198     Collective on Mat
5199 
5200    Input Parameters:
5201 +    A,B - the matrices in mpiaij format
5202 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5203 
5204    Output Parameter:
5205 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5206 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5207 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5208 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5209 
5210     Level: developer
5211 
5212 */
5213 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5214 {
5215   VecScatter_MPI_General *gen_to,*gen_from;
5216   PetscErrorCode         ierr;
5217   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5218   Mat_SeqAIJ             *b_oth;
5219   VecScatter             ctx =a->Mvctx;
5220   MPI_Comm               comm;
5221   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5222   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5223   PetscScalar            *rvalues,*svalues;
5224   MatScalar              *b_otha,*bufa,*bufA;
5225   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5226   MPI_Request            *rwaits = NULL,*swaits = NULL;
5227   MPI_Status             *sstatus,rstatus;
5228   PetscMPIInt            jj;
5229   PetscInt               *cols,sbs,rbs;
5230   PetscScalar            *vals;
5231 
5232   PetscFunctionBegin;
5233   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5234   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5235     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5236   }
5237   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5238   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5239 
5240   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5241   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5242   rvalues  = gen_from->values; /* holds the length of receiving row */
5243   svalues  = gen_to->values;   /* holds the length of sending row */
5244   nrecvs   = gen_from->n;
5245   nsends   = gen_to->n;
5246 
5247   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5248   srow    = gen_to->indices;    /* local row index to be sent */
5249   sstarts = gen_to->starts;
5250   sprocs  = gen_to->procs;
5251   sstatus = gen_to->sstatus;
5252   sbs     = gen_to->bs;
5253   rstarts = gen_from->starts;
5254   rprocs  = gen_from->procs;
5255   rbs     = gen_from->bs;
5256 
5257   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5258   if (scall == MAT_INITIAL_MATRIX) {
5259     /* i-array */
5260     /*---------*/
5261     /*  post receives */
5262     for (i=0; i<nrecvs; i++) {
5263       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5264       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5265       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5266     }
5267 
5268     /* pack the outgoing message */
5269     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5270 
5271     sstartsj[0] = 0;
5272     rstartsj[0] = 0;
5273     len         = 0; /* total length of j or a array to be sent */
5274     k           = 0;
5275     for (i=0; i<nsends; i++) {
5276       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5277       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5278       for (j=0; j<nrows; j++) {
5279         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5280         for (l=0; l<sbs; l++) {
5281           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5282 
5283           rowlen[j*sbs+l] = ncols;
5284 
5285           len += ncols;
5286           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5287         }
5288         k++;
5289       }
5290       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5291 
5292       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5293     }
5294     /* recvs and sends of i-array are completed */
5295     i = nrecvs;
5296     while (i--) {
5297       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5298     }
5299     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5300 
5301     /* allocate buffers for sending j and a arrays */
5302     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5303     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5304 
5305     /* create i-array of B_oth */
5306     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5307 
5308     b_othi[0] = 0;
5309     len       = 0; /* total length of j or a array to be received */
5310     k         = 0;
5311     for (i=0; i<nrecvs; i++) {
5312       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5313       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5314       for (j=0; j<nrows; j++) {
5315         b_othi[k+1] = b_othi[k] + rowlen[j];
5316         len        += rowlen[j]; k++;
5317       }
5318       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5319     }
5320 
5321     /* allocate space for j and a arrrays of B_oth */
5322     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5323     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5324 
5325     /* j-array */
5326     /*---------*/
5327     /*  post receives of j-array */
5328     for (i=0; i<nrecvs; i++) {
5329       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5330       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5331     }
5332 
5333     /* pack the outgoing message j-array */
5334     k = 0;
5335     for (i=0; i<nsends; i++) {
5336       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5337       bufJ  = bufj+sstartsj[i];
5338       for (j=0; j<nrows; j++) {
5339         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5340         for (ll=0; ll<sbs; ll++) {
5341           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5342           for (l=0; l<ncols; l++) {
5343             *bufJ++ = cols[l];
5344           }
5345           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5346         }
5347       }
5348       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5349     }
5350 
5351     /* recvs and sends of j-array are completed */
5352     i = nrecvs;
5353     while (i--) {
5354       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5355     }
5356     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5357   } else if (scall == MAT_REUSE_MATRIX) {
5358     sstartsj = *startsj_s;
5359     rstartsj = *startsj_r;
5360     bufa     = *bufa_ptr;
5361     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5362     b_otha   = b_oth->a;
5363   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5364 
5365   /* a-array */
5366   /*---------*/
5367   /*  post receives of a-array */
5368   for (i=0; i<nrecvs; i++) {
5369     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5370     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5371   }
5372 
5373   /* pack the outgoing message a-array */
5374   k = 0;
5375   for (i=0; i<nsends; i++) {
5376     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5377     bufA  = bufa+sstartsj[i];
5378     for (j=0; j<nrows; j++) {
5379       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5380       for (ll=0; ll<sbs; ll++) {
5381         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5382         for (l=0; l<ncols; l++) {
5383           *bufA++ = vals[l];
5384         }
5385         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5386       }
5387     }
5388     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5389   }
5390   /* recvs and sends of a-array are completed */
5391   i = nrecvs;
5392   while (i--) {
5393     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5394   }
5395   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5396   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5397 
5398   if (scall == MAT_INITIAL_MATRIX) {
5399     /* put together the new matrix */
5400     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5401 
5402     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5403     /* Since these are PETSc arrays, change flags to free them as necessary. */
5404     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5405     b_oth->free_a  = PETSC_TRUE;
5406     b_oth->free_ij = PETSC_TRUE;
5407     b_oth->nonew   = 0;
5408 
5409     ierr = PetscFree(bufj);CHKERRQ(ierr);
5410     if (!startsj_s || !bufa_ptr) {
5411       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5412       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5413     } else {
5414       *startsj_s = sstartsj;
5415       *startsj_r = rstartsj;
5416       *bufa_ptr  = bufa;
5417     }
5418   }
5419   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5420   PetscFunctionReturn(0);
5421 }
5422 
5423 #undef __FUNCT__
5424 #define __FUNCT__ "MatGetCommunicationStructs"
5425 /*@C
5426   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5427 
5428   Not Collective
5429 
5430   Input Parameters:
5431 . A - The matrix in mpiaij format
5432 
5433   Output Parameter:
5434 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5435 . colmap - A map from global column index to local index into lvec
5436 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5437 
5438   Level: developer
5439 
5440 @*/
5441 #if defined(PETSC_USE_CTABLE)
5442 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5443 #else
5444 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5445 #endif
5446 {
5447   Mat_MPIAIJ *a;
5448 
5449   PetscFunctionBegin;
5450   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5451   PetscValidPointer(lvec, 2);
5452   PetscValidPointer(colmap, 3);
5453   PetscValidPointer(multScatter, 4);
5454   a = (Mat_MPIAIJ*) A->data;
5455   if (lvec) *lvec = a->lvec;
5456   if (colmap) *colmap = a->colmap;
5457   if (multScatter) *multScatter = a->Mvctx;
5458   PetscFunctionReturn(0);
5459 }
5460 
5461 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5462 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5463 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5464 
5465 #undef __FUNCT__
5466 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5467 /*
5468     Computes (B'*A')' since computing B*A directly is untenable
5469 
5470                n                       p                          p
5471         (              )       (              )         (                  )
5472       m (      A       )  *  n (       B      )   =   m (         C        )
5473         (              )       (              )         (                  )
5474 
5475 */
5476 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5477 {
5478   PetscErrorCode ierr;
5479   Mat            At,Bt,Ct;
5480 
5481   PetscFunctionBegin;
5482   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5483   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5484   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5485   ierr = MatDestroy(&At);CHKERRQ(ierr);
5486   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5487   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5488   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5489   PetscFunctionReturn(0);
5490 }
5491 
5492 #undef __FUNCT__
5493 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5494 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5495 {
5496   PetscErrorCode ierr;
5497   PetscInt       m=A->rmap->n,n=B->cmap->n;
5498   Mat            Cmat;
5499 
5500   PetscFunctionBegin;
5501   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5502   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5503   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5504   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5505   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5506   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5507   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5508   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5509 
5510   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5511 
5512   *C = Cmat;
5513   PetscFunctionReturn(0);
5514 }
5515 
5516 /* ----------------------------------------------------------------*/
5517 #undef __FUNCT__
5518 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5519 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5520 {
5521   PetscErrorCode ierr;
5522 
5523   PetscFunctionBegin;
5524   if (scall == MAT_INITIAL_MATRIX) {
5525     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5526     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5527     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5528   }
5529   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5530   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5531   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5532   PetscFunctionReturn(0);
5533 }
5534 
5535 #if defined(PETSC_HAVE_MUMPS)
5536 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5537 #endif
5538 #if defined(PETSC_HAVE_PASTIX)
5539 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5540 #endif
5541 #if defined(PETSC_HAVE_SUPERLU_DIST)
5542 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5543 #endif
5544 #if defined(PETSC_HAVE_CLIQUE)
5545 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5546 #endif
5547 
5548 /*MC
5549    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5550 
5551    Options Database Keys:
5552 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5553 
5554   Level: beginner
5555 
5556 .seealso: MatCreateAIJ()
5557 M*/
5558 
5559 #undef __FUNCT__
5560 #define __FUNCT__ "MatCreate_MPIAIJ"
5561 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5562 {
5563   Mat_MPIAIJ     *b;
5564   PetscErrorCode ierr;
5565   PetscMPIInt    size;
5566 
5567   PetscFunctionBegin;
5568   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5569 
5570   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5571   B->data       = (void*)b;
5572   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5573   B->assembled  = PETSC_FALSE;
5574   B->insertmode = NOT_SET_VALUES;
5575   b->size       = size;
5576 
5577   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5578 
5579   /* build cache for off array entries formed */
5580   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5581 
5582   b->donotstash  = PETSC_FALSE;
5583   b->colmap      = 0;
5584   b->garray      = 0;
5585   b->roworiented = PETSC_TRUE;
5586 
5587   /* stuff used for matrix vector multiply */
5588   b->lvec  = NULL;
5589   b->Mvctx = NULL;
5590 
5591   /* stuff for MatGetRow() */
5592   b->rowindices   = 0;
5593   b->rowvalues    = 0;
5594   b->getrowactive = PETSC_FALSE;
5595 
5596   /* flexible pointer used in CUSP/CUSPARSE classes */
5597   b->spptr = NULL;
5598 
5599 #if defined(PETSC_HAVE_MUMPS)
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5601 #endif
5602 #if defined(PETSC_HAVE_PASTIX)
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5604 #endif
5605 #if defined(PETSC_HAVE_SUPERLU_DIST)
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5607 #endif
5608 #if defined(PETSC_HAVE_CLIQUE)
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5610 #endif
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5614   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5615   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5616   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5617   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5618   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5619   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5620   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5621   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5622   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5623   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5624   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5625   PetscFunctionReturn(0);
5626 }
5627 
5628 #undef __FUNCT__
5629 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5630 /*@
5631      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5632          and "off-diagonal" part of the matrix in CSR format.
5633 
5634    Collective on MPI_Comm
5635 
5636    Input Parameters:
5637 +  comm - MPI communicator
5638 .  m - number of local rows (Cannot be PETSC_DECIDE)
5639 .  n - This value should be the same as the local size used in creating the
5640        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5641        calculated if N is given) For square matrices n is almost always m.
5642 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5643 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5644 .   i - row indices for "diagonal" portion of matrix
5645 .   j - column indices
5646 .   a - matrix values
5647 .   oi - row indices for "off-diagonal" portion of matrix
5648 .   oj - column indices
5649 -   oa - matrix values
5650 
5651    Output Parameter:
5652 .   mat - the matrix
5653 
5654    Level: advanced
5655 
5656    Notes:
5657        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5658        must free the arrays once the matrix has been destroyed and not before.
5659 
5660        The i and j indices are 0 based
5661 
5662        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5663 
5664        This sets local rows and cannot be used to set off-processor values.
5665 
5666        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5667        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5668        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5669        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5670        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5671        communication if it is known that only local entries will be set.
5672 
5673 .keywords: matrix, aij, compressed row, sparse, parallel
5674 
5675 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5676           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5677 @*/
5678 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5679 {
5680   PetscErrorCode ierr;
5681   Mat_MPIAIJ     *maij;
5682 
5683   PetscFunctionBegin;
5684   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5685   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5686   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5687   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5688   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5689   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5690   maij = (Mat_MPIAIJ*) (*mat)->data;
5691 
5692   (*mat)->preallocated = PETSC_TRUE;
5693 
5694   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5695   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5696 
5697   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5698   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5699 
5700   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5701   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5702   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5703   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5704 
5705   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5706   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5707   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5708   PetscFunctionReturn(0);
5709 }
5710 
5711 /*
5712     Special version for direct calls from Fortran
5713 */
5714 #include <petsc-private/fortranimpl.h>
5715 
5716 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5717 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5718 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5719 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5720 #endif
5721 
5722 /* Change these macros so can be used in void function */
5723 #undef CHKERRQ
5724 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5725 #undef SETERRQ2
5726 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5727 #undef SETERRQ3
5728 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5729 #undef SETERRQ
5730 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5731 
5732 #undef __FUNCT__
5733 #define __FUNCT__ "matsetvaluesmpiaij_"
5734 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5735 {
5736   Mat            mat  = *mmat;
5737   PetscInt       m    = *mm, n = *mn;
5738   InsertMode     addv = *maddv;
5739   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5740   PetscScalar    value;
5741   PetscErrorCode ierr;
5742 
5743   MatCheckPreallocated(mat,1);
5744   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5745 
5746 #if defined(PETSC_USE_DEBUG)
5747   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5748 #endif
5749   {
5750     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5751     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5752     PetscBool roworiented = aij->roworiented;
5753 
5754     /* Some Variables required in the macro */
5755     Mat        A                 = aij->A;
5756     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5757     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5758     MatScalar  *aa               = a->a;
5759     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5760     Mat        B                 = aij->B;
5761     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5762     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5763     MatScalar  *ba               = b->a;
5764 
5765     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5766     PetscInt  nonew = a->nonew;
5767     MatScalar *ap1,*ap2;
5768 
5769     PetscFunctionBegin;
5770     for (i=0; i<m; i++) {
5771       if (im[i] < 0) continue;
5772 #if defined(PETSC_USE_DEBUG)
5773       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5774 #endif
5775       if (im[i] >= rstart && im[i] < rend) {
5776         row      = im[i] - rstart;
5777         lastcol1 = -1;
5778         rp1      = aj + ai[row];
5779         ap1      = aa + ai[row];
5780         rmax1    = aimax[row];
5781         nrow1    = ailen[row];
5782         low1     = 0;
5783         high1    = nrow1;
5784         lastcol2 = -1;
5785         rp2      = bj + bi[row];
5786         ap2      = ba + bi[row];
5787         rmax2    = bimax[row];
5788         nrow2    = bilen[row];
5789         low2     = 0;
5790         high2    = nrow2;
5791 
5792         for (j=0; j<n; j++) {
5793           if (roworiented) value = v[i*n+j];
5794           else value = v[i+j*m];
5795           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5796           if (in[j] >= cstart && in[j] < cend) {
5797             col = in[j] - cstart;
5798             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5799           } else if (in[j] < 0) continue;
5800 #if defined(PETSC_USE_DEBUG)
5801           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5802 #endif
5803           else {
5804             if (mat->was_assembled) {
5805               if (!aij->colmap) {
5806                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5807               }
5808 #if defined(PETSC_USE_CTABLE)
5809               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5810               col--;
5811 #else
5812               col = aij->colmap[in[j]] - 1;
5813 #endif
5814               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5815                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5816                 col  =  in[j];
5817                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5818                 B     = aij->B;
5819                 b     = (Mat_SeqAIJ*)B->data;
5820                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5821                 rp2   = bj + bi[row];
5822                 ap2   = ba + bi[row];
5823                 rmax2 = bimax[row];
5824                 nrow2 = bilen[row];
5825                 low2  = 0;
5826                 high2 = nrow2;
5827                 bm    = aij->B->rmap->n;
5828                 ba    = b->a;
5829               }
5830             } else col = in[j];
5831             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5832           }
5833         }
5834       } else if (!aij->donotstash) {
5835         if (roworiented) {
5836           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5837         } else {
5838           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5839         }
5840       }
5841     }
5842   }
5843   PetscFunctionReturnVoid();
5844 }
5845 
5846