xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 9f4d3c52fa2fe0bb72fec4f4e85d8e495867af35)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938 
939   PetscFunctionBegin;
940   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
941   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
950 {
951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
952   PetscErrorCode ierr;
953 
954   PetscFunctionBegin;
955   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
956   PetscFunctionReturn(0);
957 }
958 
959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
960 {
961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
967   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
973 {
974   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
975   PetscErrorCode ierr;
976   PetscBool      merged;
977 
978   PetscFunctionBegin;
979   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
980   /* do nondiagonal part */
981   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
982   if (!merged) {
983     /* send it on its way */
984     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
985     /* do local part */
986     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
987     /* receive remote parts: note this assumes the values are not actually */
988     /* added in yy until the next line, */
989     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
990   } else {
991     /* do local part */
992     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
993     /* send it on its way */
994     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
995     /* values actually were received in the Begin() but we need to call this nop */
996     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1002 {
1003   MPI_Comm       comm;
1004   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1005   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1006   IS             Me,Notme;
1007   PetscErrorCode ierr;
1008   PetscInt       M,N,first,last,*notme,i;
1009   PetscMPIInt    size;
1010 
1011   PetscFunctionBegin;
1012   /* Easy test: symmetric diagonal block */
1013   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1014   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1015   if (!*f) PetscFunctionReturn(0);
1016   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1017   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1018   if (size == 1) PetscFunctionReturn(0);
1019 
1020   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1021   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1022   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1023   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1024   for (i=0; i<first; i++) notme[i] = i;
1025   for (i=last; i<M; i++) notme[i-last+first] = i;
1026   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1027   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1028   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1029   Aoff = Aoffs[0];
1030   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1031   Boff = Boffs[0];
1032   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1033   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1034   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1035   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1036   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1037   ierr = PetscFree(notme);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1042 {
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1051 {
1052   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1053   PetscErrorCode ierr;
1054 
1055   PetscFunctionBegin;
1056   /* do nondiagonal part */
1057   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1058   /* send it on its way */
1059   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1060   /* do local part */
1061   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   /* receive remote parts */
1063   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 /*
1068   This only works correctly for square matrices where the subblock A->A is the
1069    diagonal block
1070 */
1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1072 {
1073   PetscErrorCode ierr;
1074   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1075 
1076   PetscFunctionBegin;
1077   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1078   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1079   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1084 {
1085   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1086   PetscErrorCode ierr;
1087 
1088   PetscFunctionBegin;
1089   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1090   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1095 {
1096   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100 #if defined(PETSC_USE_LOG)
1101   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1102 #endif
1103   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1104   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1105   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1106   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1107 #if defined(PETSC_USE_CTABLE)
1108   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1109 #else
1110   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1111 #endif
1112   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1113   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1114   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1115   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1116   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1117   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1118 
1119   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1120   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1128 #if defined(PETSC_HAVE_ELEMENTAL)
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1130 #endif
1131 #if defined(PETSC_HAVE_HYPRE)
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1134 #endif
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1139 {
1140   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1141   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1142   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1143   PetscErrorCode ierr;
1144   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1145   int            fd;
1146   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1147   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1148   PetscScalar    *column_values;
1149   PetscInt       message_count,flowcontrolcount;
1150   FILE           *file;
1151 
1152   PetscFunctionBegin;
1153   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1154   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1155   nz   = A->nz + B->nz;
1156   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1157   if (!rank) {
1158     header[0] = MAT_FILE_CLASSID;
1159     header[1] = mat->rmap->N;
1160     header[2] = mat->cmap->N;
1161 
1162     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1163     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1164     /* get largest number of rows any processor has */
1165     rlen  = mat->rmap->n;
1166     range = mat->rmap->range;
1167     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1168   } else {
1169     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1170     rlen = mat->rmap->n;
1171   }
1172 
1173   /* load up the local row counts */
1174   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1175   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1176 
1177   /* store the row lengths to the file */
1178   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1179   if (!rank) {
1180     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1181     for (i=1; i<size; i++) {
1182       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1183       rlen = range[i+1] - range[i];
1184       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1185       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1186     }
1187     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1188   } else {
1189     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1190     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1191     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1192   }
1193   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1194 
1195   /* load up the local column indices */
1196   nzmax = nz; /* th processor needs space a largest processor needs */
1197   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1198   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1199   cnt   = 0;
1200   for (i=0; i<mat->rmap->n; i++) {
1201     for (j=B->i[i]; j<B->i[i+1]; j++) {
1202       if ((col = garray[B->j[j]]) > cstart) break;
1203       column_indices[cnt++] = col;
1204     }
1205     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1206     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1207   }
1208   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1209 
1210   /* store the column indices to the file */
1211   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1212   if (!rank) {
1213     MPI_Status status;
1214     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1215     for (i=1; i<size; i++) {
1216       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1217       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1218       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1219       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1220       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1221     }
1222     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1223   } else {
1224     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1225     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1227     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1228   }
1229   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1230 
1231   /* load up the local column values */
1232   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1233   cnt  = 0;
1234   for (i=0; i<mat->rmap->n; i++) {
1235     for (j=B->i[i]; j<B->i[i+1]; j++) {
1236       if (garray[B->j[j]] > cstart) break;
1237       column_values[cnt++] = B->a[j];
1238     }
1239     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1240     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1241   }
1242   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1243 
1244   /* store the column values to the file */
1245   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1246   if (!rank) {
1247     MPI_Status status;
1248     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1249     for (i=1; i<size; i++) {
1250       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1251       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1252       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1253       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1254       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1255     }
1256     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1257   } else {
1258     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1259     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1260     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1261     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1262   }
1263   ierr = PetscFree(column_values);CHKERRQ(ierr);
1264 
1265   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1266   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #include <petscdraw.h>
1271 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1272 {
1273   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1274   PetscErrorCode    ierr;
1275   PetscMPIInt       rank = aij->rank,size = aij->size;
1276   PetscBool         isdraw,iascii,isbinary;
1277   PetscViewer       sviewer;
1278   PetscViewerFormat format;
1279 
1280   PetscFunctionBegin;
1281   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1282   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1283   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1284   if (iascii) {
1285     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1286     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1287       MatInfo   info;
1288       PetscBool inodes;
1289 
1290       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1291       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1292       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1293       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1294       if (!inodes) {
1295         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1296                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1297       } else {
1298         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1299                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1300       }
1301       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1302       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1303       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1304       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1305       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1306       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1307       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1308       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1309       PetscFunctionReturn(0);
1310     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1311       PetscInt inodecount,inodelimit,*inodes;
1312       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1313       if (inodes) {
1314         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1315       } else {
1316         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1317       }
1318       PetscFunctionReturn(0);
1319     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1320       PetscFunctionReturn(0);
1321     }
1322   } else if (isbinary) {
1323     if (size == 1) {
1324       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1325       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1326     } else {
1327       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1328     }
1329     PetscFunctionReturn(0);
1330   } else if (isdraw) {
1331     PetscDraw draw;
1332     PetscBool isnull;
1333     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1334     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1335     if (isnull) PetscFunctionReturn(0);
1336   }
1337 
1338   {
1339     /* assemble the entire matrix onto first processor. */
1340     Mat        A;
1341     Mat_SeqAIJ *Aloc;
1342     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1343     MatScalar  *a;
1344 
1345     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1346     if (!rank) {
1347       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1348     } else {
1349       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1350     }
1351     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1352     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1353     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1354     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1355     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1356 
1357     /* copy over the A part */
1358     Aloc = (Mat_SeqAIJ*)aij->A->data;
1359     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1360     row  = mat->rmap->rstart;
1361     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1362     for (i=0; i<m; i++) {
1363       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1364       row++;
1365       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1366     }
1367     aj = Aloc->j;
1368     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1369 
1370     /* copy over the B part */
1371     Aloc = (Mat_SeqAIJ*)aij->B->data;
1372     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1373     row  = mat->rmap->rstart;
1374     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1375     ct   = cols;
1376     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1377     for (i=0; i<m; i++) {
1378       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1379       row++;
1380       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1381     }
1382     ierr = PetscFree(ct);CHKERRQ(ierr);
1383     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1384     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1385     /*
1386        Everyone has to call to draw the matrix since the graphics waits are
1387        synchronized across all processors that share the PetscDraw object
1388     */
1389     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1390     if (!rank) {
1391       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1392       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1393     }
1394     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1395     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1396     ierr = MatDestroy(&A);CHKERRQ(ierr);
1397   }
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1402 {
1403   PetscErrorCode ierr;
1404   PetscBool      iascii,isdraw,issocket,isbinary;
1405 
1406   PetscFunctionBegin;
1407   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1408   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1409   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1410   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1411   if (iascii || isdraw || isbinary || issocket) {
1412     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1413   }
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1418 {
1419   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1420   PetscErrorCode ierr;
1421   Vec            bb1 = 0;
1422   PetscBool      hasop;
1423 
1424   PetscFunctionBegin;
1425   if (flag == SOR_APPLY_UPPER) {
1426     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1427     PetscFunctionReturn(0);
1428   }
1429 
1430   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1431     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1432   }
1433 
1434   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1435     if (flag & SOR_ZERO_INITIAL_GUESS) {
1436       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1437       its--;
1438     }
1439 
1440     while (its--) {
1441       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1442       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1443 
1444       /* update rhs: bb1 = bb - B*x */
1445       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1446       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1447 
1448       /* local sweep */
1449       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1450     }
1451   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1452     if (flag & SOR_ZERO_INITIAL_GUESS) {
1453       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1454       its--;
1455     }
1456     while (its--) {
1457       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1458       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1459 
1460       /* update rhs: bb1 = bb - B*x */
1461       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1462       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1463 
1464       /* local sweep */
1465       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1466     }
1467   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1468     if (flag & SOR_ZERO_INITIAL_GUESS) {
1469       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1470       its--;
1471     }
1472     while (its--) {
1473       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1474       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1475 
1476       /* update rhs: bb1 = bb - B*x */
1477       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1478       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1479 
1480       /* local sweep */
1481       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1482     }
1483   } else if (flag & SOR_EISENSTAT) {
1484     Vec xx1;
1485 
1486     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1487     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1488 
1489     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1490     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1491     if (!mat->diag) {
1492       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1493       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1494     }
1495     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1496     if (hasop) {
1497       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1498     } else {
1499       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1500     }
1501     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1502 
1503     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1504 
1505     /* local sweep */
1506     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1507     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1508     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1509   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1510 
1511   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1512 
1513   matin->factorerrortype = mat->A->factorerrortype;
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1518 {
1519   Mat            aA,aB,Aperm;
1520   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1521   PetscScalar    *aa,*ba;
1522   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1523   PetscSF        rowsf,sf;
1524   IS             parcolp = NULL;
1525   PetscBool      done;
1526   PetscErrorCode ierr;
1527 
1528   PetscFunctionBegin;
1529   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1530   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1531   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1532   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1533 
1534   /* Invert row permutation to find out where my rows should go */
1535   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1536   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1537   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1538   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1539   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1540   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1541 
1542   /* Invert column permutation to find out where my columns should go */
1543   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1544   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1545   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1546   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1547   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1548   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1549   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1550 
1551   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1552   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1553   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1554 
1555   /* Find out where my gcols should go */
1556   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1557   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1558   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1559   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1560   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1561   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1562   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1563   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1564 
1565   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1566   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1567   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1568   for (i=0; i<m; i++) {
1569     PetscInt row = rdest[i],rowner;
1570     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1571     for (j=ai[i]; j<ai[i+1]; j++) {
1572       PetscInt cowner,col = cdest[aj[j]];
1573       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1574       if (rowner == cowner) dnnz[i]++;
1575       else onnz[i]++;
1576     }
1577     for (j=bi[i]; j<bi[i+1]; j++) {
1578       PetscInt cowner,col = gcdest[bj[j]];
1579       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1580       if (rowner == cowner) dnnz[i]++;
1581       else onnz[i]++;
1582     }
1583   }
1584   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1585   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1586   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1587   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1588   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1589 
1590   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1591   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1592   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1593   for (i=0; i<m; i++) {
1594     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1595     PetscInt j0,rowlen;
1596     rowlen = ai[i+1] - ai[i];
1597     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1598       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1599       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1600     }
1601     rowlen = bi[i+1] - bi[i];
1602     for (j0=j=0; j<rowlen; j0=j) {
1603       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1604       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1605     }
1606   }
1607   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1608   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1609   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1610   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1611   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1612   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1613   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1614   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1615   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1616   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1617   *B = Aperm;
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1622 {
1623   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1624   PetscErrorCode ierr;
1625 
1626   PetscFunctionBegin;
1627   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1628   if (ghosts) *ghosts = aij->garray;
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1633 {
1634   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1635   Mat            A    = mat->A,B = mat->B;
1636   PetscErrorCode ierr;
1637   PetscReal      isend[5],irecv[5];
1638 
1639   PetscFunctionBegin;
1640   info->block_size = 1.0;
1641   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1642 
1643   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1644   isend[3] = info->memory;  isend[4] = info->mallocs;
1645 
1646   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1647 
1648   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1649   isend[3] += info->memory;  isend[4] += info->mallocs;
1650   if (flag == MAT_LOCAL) {
1651     info->nz_used      = isend[0];
1652     info->nz_allocated = isend[1];
1653     info->nz_unneeded  = isend[2];
1654     info->memory       = isend[3];
1655     info->mallocs      = isend[4];
1656   } else if (flag == MAT_GLOBAL_MAX) {
1657     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1658 
1659     info->nz_used      = irecv[0];
1660     info->nz_allocated = irecv[1];
1661     info->nz_unneeded  = irecv[2];
1662     info->memory       = irecv[3];
1663     info->mallocs      = irecv[4];
1664   } else if (flag == MAT_GLOBAL_SUM) {
1665     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1666 
1667     info->nz_used      = irecv[0];
1668     info->nz_allocated = irecv[1];
1669     info->nz_unneeded  = irecv[2];
1670     info->memory       = irecv[3];
1671     info->mallocs      = irecv[4];
1672   }
1673   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1674   info->fill_ratio_needed = 0;
1675   info->factor_mallocs    = 0;
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1680 {
1681   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1682   PetscErrorCode ierr;
1683 
1684   PetscFunctionBegin;
1685   switch (op) {
1686   case MAT_NEW_NONZERO_LOCATIONS:
1687   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1688   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1689   case MAT_KEEP_NONZERO_PATTERN:
1690   case MAT_NEW_NONZERO_LOCATION_ERR:
1691   case MAT_USE_INODES:
1692   case MAT_IGNORE_ZERO_ENTRIES:
1693     MatCheckPreallocated(A,1);
1694     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1695     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1696     break;
1697   case MAT_ROW_ORIENTED:
1698     MatCheckPreallocated(A,1);
1699     a->roworiented = flg;
1700 
1701     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1702     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1703     break;
1704   case MAT_NEW_DIAGONALS:
1705     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1706     break;
1707   case MAT_IGNORE_OFF_PROC_ENTRIES:
1708     a->donotstash = flg;
1709     break;
1710   case MAT_SPD:
1711     A->spd_set = PETSC_TRUE;
1712     A->spd     = flg;
1713     if (flg) {
1714       A->symmetric                  = PETSC_TRUE;
1715       A->structurally_symmetric     = PETSC_TRUE;
1716       A->symmetric_set              = PETSC_TRUE;
1717       A->structurally_symmetric_set = PETSC_TRUE;
1718     }
1719     break;
1720   case MAT_SYMMETRIC:
1721     MatCheckPreallocated(A,1);
1722     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1723     break;
1724   case MAT_STRUCTURALLY_SYMMETRIC:
1725     MatCheckPreallocated(A,1);
1726     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1727     break;
1728   case MAT_HERMITIAN:
1729     MatCheckPreallocated(A,1);
1730     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1731     break;
1732   case MAT_SYMMETRY_ETERNAL:
1733     MatCheckPreallocated(A,1);
1734     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1735     break;
1736   case MAT_SUBMAT_SINGLEIS:
1737     A->submat_singleis = flg;
1738     break;
1739   case MAT_STRUCTURE_ONLY:
1740     /* The option is handled directly by MatSetOption() */
1741     break;
1742   default:
1743     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1744   }
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1749 {
1750   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1751   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1752   PetscErrorCode ierr;
1753   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1754   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1755   PetscInt       *cmap,*idx_p;
1756 
1757   PetscFunctionBegin;
1758   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1759   mat->getrowactive = PETSC_TRUE;
1760 
1761   if (!mat->rowvalues && (idx || v)) {
1762     /*
1763         allocate enough space to hold information from the longest row.
1764     */
1765     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1766     PetscInt   max = 1,tmp;
1767     for (i=0; i<matin->rmap->n; i++) {
1768       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1769       if (max < tmp) max = tmp;
1770     }
1771     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1772   }
1773 
1774   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1775   lrow = row - rstart;
1776 
1777   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1778   if (!v)   {pvA = 0; pvB = 0;}
1779   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1780   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1781   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1782   nztot = nzA + nzB;
1783 
1784   cmap = mat->garray;
1785   if (v  || idx) {
1786     if (nztot) {
1787       /* Sort by increasing column numbers, assuming A and B already sorted */
1788       PetscInt imark = -1;
1789       if (v) {
1790         *v = v_p = mat->rowvalues;
1791         for (i=0; i<nzB; i++) {
1792           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1793           else break;
1794         }
1795         imark = i;
1796         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1797         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1798       }
1799       if (idx) {
1800         *idx = idx_p = mat->rowindices;
1801         if (imark > -1) {
1802           for (i=0; i<imark; i++) {
1803             idx_p[i] = cmap[cworkB[i]];
1804           }
1805         } else {
1806           for (i=0; i<nzB; i++) {
1807             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1808             else break;
1809           }
1810           imark = i;
1811         }
1812         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1813         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1814       }
1815     } else {
1816       if (idx) *idx = 0;
1817       if (v)   *v   = 0;
1818     }
1819   }
1820   *nz  = nztot;
1821   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1822   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1827 {
1828   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1829 
1830   PetscFunctionBegin;
1831   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1832   aij->getrowactive = PETSC_FALSE;
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1837 {
1838   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1839   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1840   PetscErrorCode ierr;
1841   PetscInt       i,j,cstart = mat->cmap->rstart;
1842   PetscReal      sum = 0.0;
1843   MatScalar      *v;
1844 
1845   PetscFunctionBegin;
1846   if (aij->size == 1) {
1847     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1848   } else {
1849     if (type == NORM_FROBENIUS) {
1850       v = amat->a;
1851       for (i=0; i<amat->nz; i++) {
1852         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1853       }
1854       v = bmat->a;
1855       for (i=0; i<bmat->nz; i++) {
1856         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1857       }
1858       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1859       *norm = PetscSqrtReal(*norm);
1860       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1861     } else if (type == NORM_1) { /* max column norm */
1862       PetscReal *tmp,*tmp2;
1863       PetscInt  *jj,*garray = aij->garray;
1864       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1865       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1866       *norm = 0.0;
1867       v     = amat->a; jj = amat->j;
1868       for (j=0; j<amat->nz; j++) {
1869         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1870       }
1871       v = bmat->a; jj = bmat->j;
1872       for (j=0; j<bmat->nz; j++) {
1873         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1874       }
1875       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1876       for (j=0; j<mat->cmap->N; j++) {
1877         if (tmp2[j] > *norm) *norm = tmp2[j];
1878       }
1879       ierr = PetscFree(tmp);CHKERRQ(ierr);
1880       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1881       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1882     } else if (type == NORM_INFINITY) { /* max row norm */
1883       PetscReal ntemp = 0.0;
1884       for (j=0; j<aij->A->rmap->n; j++) {
1885         v   = amat->a + amat->i[j];
1886         sum = 0.0;
1887         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1888           sum += PetscAbsScalar(*v); v++;
1889         }
1890         v = bmat->a + bmat->i[j];
1891         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1892           sum += PetscAbsScalar(*v); v++;
1893         }
1894         if (sum > ntemp) ntemp = sum;
1895       }
1896       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1897       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1898     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1899   }
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1904 {
1905   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1906   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1907   PetscErrorCode ierr;
1908   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1909   PetscInt       cstart = A->cmap->rstart,ncol;
1910   Mat            B;
1911   MatScalar      *array;
1912 
1913   PetscFunctionBegin;
1914   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1915 
1916   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1917   ai = Aloc->i; aj = Aloc->j;
1918   bi = Bloc->i; bj = Bloc->j;
1919   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1920     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1921     PetscSFNode          *oloc;
1922     PETSC_UNUSED PetscSF sf;
1923 
1924     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1925     /* compute d_nnz for preallocation */
1926     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1927     for (i=0; i<ai[ma]; i++) {
1928       d_nnz[aj[i]]++;
1929       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1930     }
1931     /* compute local off-diagonal contributions */
1932     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1933     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1934     /* map those to global */
1935     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1936     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1937     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1938     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1939     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1940     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1941     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1942 
1943     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1944     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1945     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1946     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1947     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1948     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1949   } else {
1950     B    = *matout;
1951     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1952     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1953   }
1954 
1955   /* copy over the A part */
1956   array = Aloc->a;
1957   row   = A->rmap->rstart;
1958   for (i=0; i<ma; i++) {
1959     ncol = ai[i+1]-ai[i];
1960     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1961     row++;
1962     array += ncol; aj += ncol;
1963   }
1964   aj = Aloc->j;
1965   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1966 
1967   /* copy over the B part */
1968   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1969   array = Bloc->a;
1970   row   = A->rmap->rstart;
1971   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1972   cols_tmp = cols;
1973   for (i=0; i<mb; i++) {
1974     ncol = bi[i+1]-bi[i];
1975     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1976     row++;
1977     array += ncol; cols_tmp += ncol;
1978   }
1979   ierr = PetscFree(cols);CHKERRQ(ierr);
1980 
1981   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1982   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1983   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1984     *matout = B;
1985   } else {
1986     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1987   }
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1992 {
1993   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1994   Mat            a    = aij->A,b = aij->B;
1995   PetscErrorCode ierr;
1996   PetscInt       s1,s2,s3;
1997 
1998   PetscFunctionBegin;
1999   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2000   if (rr) {
2001     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2002     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2003     /* Overlap communication with computation. */
2004     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2005   }
2006   if (ll) {
2007     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2008     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2009     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2010   }
2011   /* scale  the diagonal block */
2012   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2013 
2014   if (rr) {
2015     /* Do a scatter end and then right scale the off-diagonal block */
2016     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2017     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2018   }
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2023 {
2024   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2025   PetscErrorCode ierr;
2026 
2027   PetscFunctionBegin;
2028   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2029   PetscFunctionReturn(0);
2030 }
2031 
2032 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2033 {
2034   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2035   Mat            a,b,c,d;
2036   PetscBool      flg;
2037   PetscErrorCode ierr;
2038 
2039   PetscFunctionBegin;
2040   a = matA->A; b = matA->B;
2041   c = matB->A; d = matB->B;
2042 
2043   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2044   if (flg) {
2045     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2046   }
2047   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2052 {
2053   PetscErrorCode ierr;
2054   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2055   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2056 
2057   PetscFunctionBegin;
2058   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2059   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2060     /* because of the column compression in the off-processor part of the matrix a->B,
2061        the number of columns in a->B and b->B may be different, hence we cannot call
2062        the MatCopy() directly on the two parts. If need be, we can provide a more
2063        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2064        then copying the submatrices */
2065     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2066   } else {
2067     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2068     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2069   }
2070   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2075 {
2076   PetscErrorCode ierr;
2077 
2078   PetscFunctionBegin;
2079   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 /*
2084    Computes the number of nonzeros per row needed for preallocation when X and Y
2085    have different nonzero structure.
2086 */
2087 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2088 {
2089   PetscInt       i,j,k,nzx,nzy;
2090 
2091   PetscFunctionBegin;
2092   /* Set the number of nonzeros in the new matrix */
2093   for (i=0; i<m; i++) {
2094     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2095     nzx = xi[i+1] - xi[i];
2096     nzy = yi[i+1] - yi[i];
2097     nnz[i] = 0;
2098     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2099       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2100       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2101       nnz[i]++;
2102     }
2103     for (; k<nzy; k++) nnz[i]++;
2104   }
2105   PetscFunctionReturn(0);
2106 }
2107 
2108 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2109 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2110 {
2111   PetscErrorCode ierr;
2112   PetscInt       m = Y->rmap->N;
2113   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2114   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2115 
2116   PetscFunctionBegin;
2117   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2122 {
2123   PetscErrorCode ierr;
2124   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2125   PetscBLASInt   bnz,one=1;
2126   Mat_SeqAIJ     *x,*y;
2127 
2128   PetscFunctionBegin;
2129   if (str == SAME_NONZERO_PATTERN) {
2130     PetscScalar alpha = a;
2131     x    = (Mat_SeqAIJ*)xx->A->data;
2132     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2133     y    = (Mat_SeqAIJ*)yy->A->data;
2134     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2135     x    = (Mat_SeqAIJ*)xx->B->data;
2136     y    = (Mat_SeqAIJ*)yy->B->data;
2137     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2138     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2139     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2140   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2141     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2142   } else {
2143     Mat      B;
2144     PetscInt *nnz_d,*nnz_o;
2145     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2146     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2147     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2148     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2149     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2150     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2151     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2152     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2153     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2154     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2155     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2156     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2157     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2158     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2159   }
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2164 
2165 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2166 {
2167 #if defined(PETSC_USE_COMPLEX)
2168   PetscErrorCode ierr;
2169   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2170 
2171   PetscFunctionBegin;
2172   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2173   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2174 #else
2175   PetscFunctionBegin;
2176 #endif
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2181 {
2182   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2183   PetscErrorCode ierr;
2184 
2185   PetscFunctionBegin;
2186   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2187   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2192 {
2193   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2194   PetscErrorCode ierr;
2195 
2196   PetscFunctionBegin;
2197   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2198   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206   PetscInt       i,*idxb = 0;
2207   PetscScalar    *va,*vb;
2208   Vec            vtmp;
2209 
2210   PetscFunctionBegin;
2211   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2212   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2213   if (idx) {
2214     for (i=0; i<A->rmap->n; i++) {
2215       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2216     }
2217   }
2218 
2219   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2220   if (idx) {
2221     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2222   }
2223   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2224   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2225 
2226   for (i=0; i<A->rmap->n; i++) {
2227     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2228       va[i] = vb[i];
2229       if (idx) idx[i] = a->garray[idxb[i]];
2230     }
2231   }
2232 
2233   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2234   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2235   ierr = PetscFree(idxb);CHKERRQ(ierr);
2236   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2237   PetscFunctionReturn(0);
2238 }
2239 
2240 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2241 {
2242   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2243   PetscErrorCode ierr;
2244   PetscInt       i,*idxb = 0;
2245   PetscScalar    *va,*vb;
2246   Vec            vtmp;
2247 
2248   PetscFunctionBegin;
2249   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2250   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2251   if (idx) {
2252     for (i=0; i<A->cmap->n; i++) {
2253       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2254     }
2255   }
2256 
2257   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2258   if (idx) {
2259     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2260   }
2261   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2262   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2263 
2264   for (i=0; i<A->rmap->n; i++) {
2265     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2266       va[i] = vb[i];
2267       if (idx) idx[i] = a->garray[idxb[i]];
2268     }
2269   }
2270 
2271   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2272   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2273   ierr = PetscFree(idxb);CHKERRQ(ierr);
2274   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2279 {
2280   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2281   PetscInt       n      = A->rmap->n;
2282   PetscInt       cstart = A->cmap->rstart;
2283   PetscInt       *cmap  = mat->garray;
2284   PetscInt       *diagIdx, *offdiagIdx;
2285   Vec            diagV, offdiagV;
2286   PetscScalar    *a, *diagA, *offdiagA;
2287   PetscInt       r;
2288   PetscErrorCode ierr;
2289 
2290   PetscFunctionBegin;
2291   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2292   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2293   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2294   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2295   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2296   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2297   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2298   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2299   for (r = 0; r < n; ++r) {
2300     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2301       a[r]   = diagA[r];
2302       idx[r] = cstart + diagIdx[r];
2303     } else {
2304       a[r]   = offdiagA[r];
2305       idx[r] = cmap[offdiagIdx[r]];
2306     }
2307   }
2308   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2309   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2310   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2311   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2312   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2313   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2318 {
2319   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2320   PetscInt       n      = A->rmap->n;
2321   PetscInt       cstart = A->cmap->rstart;
2322   PetscInt       *cmap  = mat->garray;
2323   PetscInt       *diagIdx, *offdiagIdx;
2324   Vec            diagV, offdiagV;
2325   PetscScalar    *a, *diagA, *offdiagA;
2326   PetscInt       r;
2327   PetscErrorCode ierr;
2328 
2329   PetscFunctionBegin;
2330   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2331   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2332   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2333   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2334   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2335   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2336   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2337   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2338   for (r = 0; r < n; ++r) {
2339     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2340       a[r]   = diagA[r];
2341       idx[r] = cstart + diagIdx[r];
2342     } else {
2343       a[r]   = offdiagA[r];
2344       idx[r] = cmap[offdiagIdx[r]];
2345     }
2346   }
2347   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2348   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2349   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2350   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2351   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2352   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2353   PetscFunctionReturn(0);
2354 }
2355 
2356 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2357 {
2358   PetscErrorCode ierr;
2359   Mat            *dummy;
2360 
2361   PetscFunctionBegin;
2362   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2363   *newmat = *dummy;
2364   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2365   PetscFunctionReturn(0);
2366 }
2367 
2368 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2369 {
2370   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2371   PetscErrorCode ierr;
2372 
2373   PetscFunctionBegin;
2374   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2375   A->factorerrortype = a->A->factorerrortype;
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2380 {
2381   PetscErrorCode ierr;
2382   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2383 
2384   PetscFunctionBegin;
2385   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2386   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2387   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2388   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2393 {
2394   PetscFunctionBegin;
2395   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2396   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 /*@
2401    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2402 
2403    Collective on Mat
2404 
2405    Input Parameters:
2406 +    A - the matrix
2407 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2408 
2409  Level: advanced
2410 
2411 @*/
2412 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2413 {
2414   PetscErrorCode       ierr;
2415 
2416   PetscFunctionBegin;
2417   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2422 {
2423   PetscErrorCode       ierr;
2424   PetscBool            sc = PETSC_FALSE,flg;
2425 
2426   PetscFunctionBegin;
2427   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2428   ierr = PetscObjectOptionsBegin((PetscObject)A);
2429     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2430     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2431     if (flg) {
2432       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2433     }
2434   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2439 {
2440   PetscErrorCode ierr;
2441   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2442   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2443 
2444   PetscFunctionBegin;
2445   if (!Y->preallocated) {
2446     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2447   } else if (!aij->nz) {
2448     PetscInt nonew = aij->nonew;
2449     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2450     aij->nonew = nonew;
2451   }
2452   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2457 {
2458   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2459   PetscErrorCode ierr;
2460 
2461   PetscFunctionBegin;
2462   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2463   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2464   if (d) {
2465     PetscInt rstart;
2466     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2467     *d += rstart;
2468 
2469   }
2470   PetscFunctionReturn(0);
2471 }
2472 
2473 
2474 /* -------------------------------------------------------------------*/
2475 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2476                                        MatGetRow_MPIAIJ,
2477                                        MatRestoreRow_MPIAIJ,
2478                                        MatMult_MPIAIJ,
2479                                 /* 4*/ MatMultAdd_MPIAIJ,
2480                                        MatMultTranspose_MPIAIJ,
2481                                        MatMultTransposeAdd_MPIAIJ,
2482                                        0,
2483                                        0,
2484                                        0,
2485                                 /*10*/ 0,
2486                                        0,
2487                                        0,
2488                                        MatSOR_MPIAIJ,
2489                                        MatTranspose_MPIAIJ,
2490                                 /*15*/ MatGetInfo_MPIAIJ,
2491                                        MatEqual_MPIAIJ,
2492                                        MatGetDiagonal_MPIAIJ,
2493                                        MatDiagonalScale_MPIAIJ,
2494                                        MatNorm_MPIAIJ,
2495                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2496                                        MatAssemblyEnd_MPIAIJ,
2497                                        MatSetOption_MPIAIJ,
2498                                        MatZeroEntries_MPIAIJ,
2499                                 /*24*/ MatZeroRows_MPIAIJ,
2500                                        0,
2501                                        0,
2502                                        0,
2503                                        0,
2504                                 /*29*/ MatSetUp_MPIAIJ,
2505                                        0,
2506                                        0,
2507                                        MatGetDiagonalBlock_MPIAIJ,
2508                                        0,
2509                                 /*34*/ MatDuplicate_MPIAIJ,
2510                                        0,
2511                                        0,
2512                                        0,
2513                                        0,
2514                                 /*39*/ MatAXPY_MPIAIJ,
2515                                        MatCreateSubMatrices_MPIAIJ,
2516                                        MatIncreaseOverlap_MPIAIJ,
2517                                        MatGetValues_MPIAIJ,
2518                                        MatCopy_MPIAIJ,
2519                                 /*44*/ MatGetRowMax_MPIAIJ,
2520                                        MatScale_MPIAIJ,
2521                                        MatShift_MPIAIJ,
2522                                        MatDiagonalSet_MPIAIJ,
2523                                        MatZeroRowsColumns_MPIAIJ,
2524                                 /*49*/ MatSetRandom_MPIAIJ,
2525                                        0,
2526                                        0,
2527                                        0,
2528                                        0,
2529                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2530                                        0,
2531                                        MatSetUnfactored_MPIAIJ,
2532                                        MatPermute_MPIAIJ,
2533                                        0,
2534                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2535                                        MatDestroy_MPIAIJ,
2536                                        MatView_MPIAIJ,
2537                                        0,
2538                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2539                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2540                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2541                                        0,
2542                                        0,
2543                                        0,
2544                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2545                                        MatGetRowMinAbs_MPIAIJ,
2546                                        0,
2547                                        0,
2548                                        0,
2549                                        0,
2550                                 /*75*/ MatFDColoringApply_AIJ,
2551                                        MatSetFromOptions_MPIAIJ,
2552                                        0,
2553                                        0,
2554                                        MatFindZeroDiagonals_MPIAIJ,
2555                                 /*80*/ 0,
2556                                        0,
2557                                        0,
2558                                 /*83*/ MatLoad_MPIAIJ,
2559                                        MatIsSymmetric_MPIAIJ,
2560                                        0,
2561                                        0,
2562                                        0,
2563                                        0,
2564                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2565                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2566                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2567                                        MatPtAP_MPIAIJ_MPIAIJ,
2568                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2569                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2570                                        0,
2571                                        0,
2572                                        0,
2573                                        0,
2574                                 /*99*/ 0,
2575                                        0,
2576                                        0,
2577                                        MatConjugate_MPIAIJ,
2578                                        0,
2579                                 /*104*/MatSetValuesRow_MPIAIJ,
2580                                        MatRealPart_MPIAIJ,
2581                                        MatImaginaryPart_MPIAIJ,
2582                                        0,
2583                                        0,
2584                                 /*109*/0,
2585                                        0,
2586                                        MatGetRowMin_MPIAIJ,
2587                                        0,
2588                                        MatMissingDiagonal_MPIAIJ,
2589                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2590                                        0,
2591                                        MatGetGhosts_MPIAIJ,
2592                                        0,
2593                                        0,
2594                                 /*119*/0,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                        MatGetMultiProcBlock_MPIAIJ,
2599                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2600                                        MatGetColumnNorms_MPIAIJ,
2601                                        MatInvertBlockDiagonal_MPIAIJ,
2602                                        0,
2603                                        MatCreateSubMatricesMPI_MPIAIJ,
2604                                 /*129*/0,
2605                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2606                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2607                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2608                                        0,
2609                                 /*134*/0,
2610                                        0,
2611                                        MatRARt_MPIAIJ_MPIAIJ,
2612                                        0,
2613                                        0,
2614                                 /*139*/MatSetBlockSizes_MPIAIJ,
2615                                        0,
2616                                        0,
2617                                        MatFDColoringSetUp_MPIXAIJ,
2618                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2619                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2620 };
2621 
2622 /* ----------------------------------------------------------------------------------------*/
2623 
2624 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2625 {
2626   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2627   PetscErrorCode ierr;
2628 
2629   PetscFunctionBegin;
2630   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2631   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2636 {
2637   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2638   PetscErrorCode ierr;
2639 
2640   PetscFunctionBegin;
2641   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2642   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2643   PetscFunctionReturn(0);
2644 }
2645 
2646 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2647 {
2648   Mat_MPIAIJ     *b;
2649   PetscErrorCode ierr;
2650 
2651   PetscFunctionBegin;
2652   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2653   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2654   b = (Mat_MPIAIJ*)B->data;
2655 
2656 #if defined(PETSC_USE_CTABLE)
2657   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2658 #else
2659   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2660 #endif
2661   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2662   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2663   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2664 
2665   /* Because the B will have been resized we simply destroy it and create a new one each time */
2666   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2667   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2668   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2669   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2670   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2671   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2672 
2673   if (!B->preallocated) {
2674     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2675     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2676     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2677     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2678     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2679   }
2680 
2681   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2682   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2683   B->preallocated  = PETSC_TRUE;
2684   B->was_assembled = PETSC_FALSE;
2685   B->assembled     = PETSC_FALSE;;
2686   PetscFunctionReturn(0);
2687 }
2688 
2689 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2690 {
2691   Mat_MPIAIJ     *b;
2692   PetscErrorCode ierr;
2693 
2694   PetscFunctionBegin;
2695   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2696   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2697   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2698   b = (Mat_MPIAIJ*)B->data;
2699 
2700 #if defined(PETSC_USE_CTABLE)
2701   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2702 #else
2703   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2704 #endif
2705   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2706   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2707   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2708 
2709   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2710   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2711   B->preallocated  = PETSC_TRUE;
2712   B->was_assembled = PETSC_FALSE;
2713   B->assembled = PETSC_FALSE;
2714   PetscFunctionReturn(0);
2715 }
2716 
2717 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2718 {
2719   Mat            mat;
2720   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2721   PetscErrorCode ierr;
2722 
2723   PetscFunctionBegin;
2724   *newmat = 0;
2725   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2726   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2727   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2728   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2729   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2730   a       = (Mat_MPIAIJ*)mat->data;
2731 
2732   mat->factortype   = matin->factortype;
2733   mat->assembled    = PETSC_TRUE;
2734   mat->insertmode   = NOT_SET_VALUES;
2735   mat->preallocated = PETSC_TRUE;
2736 
2737   a->size         = oldmat->size;
2738   a->rank         = oldmat->rank;
2739   a->donotstash   = oldmat->donotstash;
2740   a->roworiented  = oldmat->roworiented;
2741   a->rowindices   = 0;
2742   a->rowvalues    = 0;
2743   a->getrowactive = PETSC_FALSE;
2744 
2745   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2746   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2747 
2748   if (oldmat->colmap) {
2749 #if defined(PETSC_USE_CTABLE)
2750     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2751 #else
2752     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2753     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2754     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2755 #endif
2756   } else a->colmap = 0;
2757   if (oldmat->garray) {
2758     PetscInt len;
2759     len  = oldmat->B->cmap->n;
2760     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2761     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2762     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2763   } else a->garray = 0;
2764 
2765   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2766   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2767   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2768   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2769   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2770   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2771   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2772   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2773   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2774   *newmat = mat;
2775   PetscFunctionReturn(0);
2776 }
2777 
2778 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2779 {
2780   PetscScalar    *vals,*svals;
2781   MPI_Comm       comm;
2782   PetscErrorCode ierr;
2783   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2784   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2785   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2786   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2787   PetscInt       cend,cstart,n,*rowners;
2788   int            fd;
2789   PetscInt       bs = newMat->rmap->bs;
2790 
2791   PetscFunctionBegin;
2792   /* force binary viewer to load .info file if it has not yet done so */
2793   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2794   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2795   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2796   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2797   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2798   if (!rank) {
2799     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2800     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2801     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2802   }
2803 
2804   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2805   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2806   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2807   if (bs < 0) bs = 1;
2808 
2809   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2810   M    = header[1]; N = header[2];
2811 
2812   /* If global sizes are set, check if they are consistent with that given in the file */
2813   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2814   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2815 
2816   /* determine ownership of all (block) rows */
2817   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2818   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2819   else m = newMat->rmap->n; /* Set by user */
2820 
2821   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2822   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2823 
2824   /* First process needs enough room for process with most rows */
2825   if (!rank) {
2826     mmax = rowners[1];
2827     for (i=2; i<=size; i++) {
2828       mmax = PetscMax(mmax, rowners[i]);
2829     }
2830   } else mmax = -1;             /* unused, but compilers complain */
2831 
2832   rowners[0] = 0;
2833   for (i=2; i<=size; i++) {
2834     rowners[i] += rowners[i-1];
2835   }
2836   rstart = rowners[rank];
2837   rend   = rowners[rank+1];
2838 
2839   /* distribute row lengths to all processors */
2840   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2841   if (!rank) {
2842     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2843     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2844     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2845     for (j=0; j<m; j++) {
2846       procsnz[0] += ourlens[j];
2847     }
2848     for (i=1; i<size; i++) {
2849       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2850       /* calculate the number of nonzeros on each processor */
2851       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2852         procsnz[i] += rowlengths[j];
2853       }
2854       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2855     }
2856     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2857   } else {
2858     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2859   }
2860 
2861   if (!rank) {
2862     /* determine max buffer needed and allocate it */
2863     maxnz = 0;
2864     for (i=0; i<size; i++) {
2865       maxnz = PetscMax(maxnz,procsnz[i]);
2866     }
2867     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2868 
2869     /* read in my part of the matrix column indices  */
2870     nz   = procsnz[0];
2871     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2872     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2873 
2874     /* read in every one elses and ship off */
2875     for (i=1; i<size; i++) {
2876       nz   = procsnz[i];
2877       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2878       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2879     }
2880     ierr = PetscFree(cols);CHKERRQ(ierr);
2881   } else {
2882     /* determine buffer space needed for message */
2883     nz = 0;
2884     for (i=0; i<m; i++) {
2885       nz += ourlens[i];
2886     }
2887     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2888 
2889     /* receive message of column indices*/
2890     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2891   }
2892 
2893   /* determine column ownership if matrix is not square */
2894   if (N != M) {
2895     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2896     else n = newMat->cmap->n;
2897     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2898     cstart = cend - n;
2899   } else {
2900     cstart = rstart;
2901     cend   = rend;
2902     n      = cend - cstart;
2903   }
2904 
2905   /* loop over local rows, determining number of off diagonal entries */
2906   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2907   jj   = 0;
2908   for (i=0; i<m; i++) {
2909     for (j=0; j<ourlens[i]; j++) {
2910       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2911       jj++;
2912     }
2913   }
2914 
2915   for (i=0; i<m; i++) {
2916     ourlens[i] -= offlens[i];
2917   }
2918   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2919 
2920   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2921 
2922   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2923 
2924   for (i=0; i<m; i++) {
2925     ourlens[i] += offlens[i];
2926   }
2927 
2928   if (!rank) {
2929     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2930 
2931     /* read in my part of the matrix numerical values  */
2932     nz   = procsnz[0];
2933     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2934 
2935     /* insert into matrix */
2936     jj      = rstart;
2937     smycols = mycols;
2938     svals   = vals;
2939     for (i=0; i<m; i++) {
2940       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2941       smycols += ourlens[i];
2942       svals   += ourlens[i];
2943       jj++;
2944     }
2945 
2946     /* read in other processors and ship out */
2947     for (i=1; i<size; i++) {
2948       nz   = procsnz[i];
2949       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2950       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2951     }
2952     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2953   } else {
2954     /* receive numeric values */
2955     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2956 
2957     /* receive message of values*/
2958     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2959 
2960     /* insert into matrix */
2961     jj      = rstart;
2962     smycols = mycols;
2963     svals   = vals;
2964     for (i=0; i<m; i++) {
2965       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2966       smycols += ourlens[i];
2967       svals   += ourlens[i];
2968       jj++;
2969     }
2970   }
2971   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2972   ierr = PetscFree(vals);CHKERRQ(ierr);
2973   ierr = PetscFree(mycols);CHKERRQ(ierr);
2974   ierr = PetscFree(rowners);CHKERRQ(ierr);
2975   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2976   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 /* Not scalable because of ISAllGather() unless getting all columns. */
2981 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2982 {
2983   PetscErrorCode ierr;
2984   IS             iscol_local;
2985   PetscBool      isstride;
2986   PetscMPIInt    lisstride=0,gisstride;
2987 
2988   PetscFunctionBegin;
2989   /* check if we are grabbing all columns*/
2990   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2991 
2992   if (isstride) {
2993     PetscInt  start,len,mstart,mlen;
2994     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2995     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2996     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2997     if (mstart == start && mlen-mstart == len) lisstride = 1;
2998   }
2999 
3000   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3001   if (gisstride) {
3002     PetscInt N;
3003     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3004     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3005     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3006     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3007   } else {
3008     PetscInt cbs;
3009     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3010     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3011     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3012   }
3013 
3014   *isseq = iscol_local;
3015   PetscFunctionReturn(0);
3016 }
3017 
3018 /*
3019  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3020  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3021 
3022  Input Parameters:
3023    mat - matrix
3024    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3025            i.e., mat->rstart <= isrow[i] < mat->rend
3026    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3027            i.e., mat->cstart <= iscol[i] < mat->cend
3028  Output Parameter:
3029    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3030    iscol_o - sequential column index set for retrieving mat->B
3031    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3032  */
3033 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3034 {
3035   PetscErrorCode ierr;
3036   Vec            x,cmap;
3037   const PetscInt *is_idx;
3038   PetscScalar    *xarray,*cmaparray;
3039   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3040   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3041   Mat            B=a->B;
3042   Vec            lvec=a->lvec,lcmap;
3043   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3044   MPI_Comm       comm;
3045 
3046   PetscFunctionBegin;
3047   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3048   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3049 
3050   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3051   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3052   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3053   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3054 
3055   /* Get start indices */
3056   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3057   isstart -= ncols;
3058   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3059 
3060   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3061   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3062   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3063   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3064   for (i=0; i<ncols; i++) {
3065     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3066     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3067     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3068   }
3069   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3070   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3071   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3072 
3073   /* Get iscol_d */
3074   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3075   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3076   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3077 
3078   /* Get isrow_d */
3079   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3080   rstart = mat->rmap->rstart;
3081   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3082   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3083   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3084   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3085 
3086   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3087   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3088   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3089 
3090   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3091   ierr = VecScatterBegin(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3092 
3093   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3094 
3095   ierr = VecScatterEnd(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3096   ierr = VecScatterBegin(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3097   ierr = VecScatterEnd(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3098 
3099   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3100   /* off-process column indices */
3101   count = 0;
3102   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3103   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3104 
3105   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3106   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3107   for (i=0; i<Bn; i++) {
3108     if (PetscRealPart(xarray[i]) > -1.0) {
3109       idx[count]     = i;                   /* local column index in off-diagonal part B */
3110       cmap1[count++] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3111     }
3112   }
3113   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3114   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3115 
3116   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3117   /* cannot ensure iscol_o has same blocksize as iscol! */
3118 
3119   ierr = PetscFree(idx);CHKERRQ(ierr);
3120 
3121   *garray = cmap1;
3122 
3123   ierr = VecDestroy(&x);CHKERRQ(ierr);
3124   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3125   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3126   PetscFunctionReturn(0);
3127 }
3128 
3129 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3130 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3131 {
3132   PetscErrorCode ierr;
3133   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3134   Mat            M = NULL;
3135   MPI_Comm       comm;
3136   IS             iscol_d,isrow_d,iscol_o;
3137   Mat            Asub = NULL,Bsub = NULL;
3138   PetscInt       n;
3139 
3140   PetscFunctionBegin;
3141   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3142 
3143   if (call == MAT_REUSE_MATRIX) {
3144     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3145     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3146     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3147 
3148     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3149     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3150 
3151     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3152     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3153 
3154     /* Update diagonal and off-diagonal portions of submat */
3155     asub = (Mat_MPIAIJ*)(*submat)->data;
3156     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3157     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3158     if (n) {
3159       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3160     }
3161     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3162     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3163 
3164   } else { /* call == MAT_INITIAL_MATRIX) */
3165     const PetscInt *garray;
3166     PetscInt        BsubN;
3167 
3168     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3169     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3170 
3171     /* Create local submatrices Asub and Bsub */
3172     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3173     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3174 
3175     /* Create submatrix M */
3176     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3177 
3178     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3179     asub = (Mat_MPIAIJ*)M->data;
3180 
3181     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3182     n = asub->B->cmap->N;
3183     if (BsubN > n) {
3184       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3185       const PetscInt *idx;
3186       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3187       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3188 
3189       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3190       j = 0;
3191       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3192       for (i=0; i<n; i++) {
3193         if (j >= BsubN) break;
3194         while (subgarray[i] > garray[j]) j++;
3195 
3196         if (subgarray[i] == garray[j]) {
3197           idx_new[i] = idx[j++];
3198         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3199       }
3200       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3201 
3202       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3203       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3204 
3205     } else if (BsubN < n) {
3206       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3207     }
3208 
3209     ierr = PetscFree(garray);CHKERRQ(ierr);
3210     *submat = M;
3211 
3212     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3213     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3214     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3215 
3216     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3217     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3218 
3219     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3220     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3221   }
3222   PetscFunctionReturn(0);
3223 }
3224 
3225 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3226 {
3227   PetscErrorCode ierr;
3228   IS             iscol_local=NULL,isrow_d;
3229   PetscInt       csize;
3230   PetscInt       n,i,j,start,end;
3231   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3232   MPI_Comm       comm;
3233 
3234   PetscFunctionBegin;
3235   /* If isrow has same processor distribution as mat,
3236      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3237   if (call == MAT_REUSE_MATRIX) {
3238     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3239     if (isrow_d) {
3240       sameRowDist  = PETSC_TRUE;
3241       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3242     } else {
3243       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3244       if (iscol_local) {
3245         sameRowDist  = PETSC_TRUE;
3246         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3247       }
3248     }
3249   } else {
3250     /* Check if isrow has same processor distribution as mat */
3251     sameDist[0] = PETSC_FALSE;
3252     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3253     if (!n) {
3254       sameDist[0] = PETSC_TRUE;
3255     } else {
3256       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3257       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3258       if (i >= start && j < end) {
3259         sameDist[0] = PETSC_TRUE;
3260       }
3261     }
3262 
3263     /* Check if iscol has same processor distribution as mat */
3264     sameDist[1] = PETSC_FALSE;
3265     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3266     if (!n) {
3267       sameDist[1] = PETSC_TRUE;
3268     } else {
3269       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3270       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3271       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3272     }
3273 
3274     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3275     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3276     sameRowDist = tsameDist[0];
3277   }
3278 
3279   if (sameRowDist) {
3280     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3281       /* isrow and iscol have same processor distribution as mat */
3282       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3283       PetscFunctionReturn(0);
3284     } else { /* sameRowDist */
3285       /* isrow has same processor distribution as mat */
3286       if (call == MAT_INITIAL_MATRIX) {
3287         PetscBool sorted;
3288         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3289         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3290         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3291         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3292 
3293         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3294         if (sorted) {
3295           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3296           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3297           PetscFunctionReturn(0);
3298         }
3299       } else { /* call == MAT_REUSE_MATRIX */
3300         IS    iscol_sub;
3301         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3302         if (iscol_sub) {
3303           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3304           PetscFunctionReturn(0);
3305         }
3306       }
3307     }
3308   }
3309 
3310   /* General case: iscol -> iscol_local which has global size of iscol */
3311   if (call == MAT_REUSE_MATRIX) {
3312     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3313     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3314   } else {
3315     if (!iscol_local) {
3316       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3317     }
3318   }
3319 
3320   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3321   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3322 
3323   if (call == MAT_INITIAL_MATRIX) {
3324     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3325     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3326   }
3327   PetscFunctionReturn(0);
3328 }
3329 
3330 /*@C
3331      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3332          and "off-diagonal" part of the matrix in CSR format.
3333 
3334    Collective on MPI_Comm
3335 
3336    Input Parameters:
3337 +  comm - MPI communicator
3338 .  A - "diagonal" portion of matrix
3339 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3340 -  garray - global index of B columns
3341 
3342    Output Parameter:
3343 .   mat - the matrix, with input A as its local diagonal matrix
3344    Level: advanced
3345 
3346    Notes:
3347        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3348        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3349 
3350 .seealso: MatCreateMPIAIJWithSplitArrays()
3351 @*/
3352 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3353 {
3354   PetscErrorCode ierr;
3355   Mat_MPIAIJ     *maij;
3356   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3357   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3358   PetscScalar    *oa=b->a;
3359   Mat            Bnew;
3360   PetscInt       m,n,N;
3361 
3362   PetscFunctionBegin;
3363   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3364   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3365   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3366   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3367   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3368   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3369 
3370   /* Get global columns of mat */
3371   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3372 
3373   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3374   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3375   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3376   maij = (Mat_MPIAIJ*)(*mat)->data;
3377 
3378   (*mat)->preallocated = PETSC_TRUE;
3379 
3380   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3381   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3382 
3383   /* Set A as diagonal portion of *mat */
3384   maij->A = A;
3385 
3386   nz = oi[m];
3387   for (i=0; i<nz; i++) {
3388     col   = oj[i];
3389     oj[i] = garray[col];
3390   }
3391 
3392    /* Set Bnew as off-diagonal portion of *mat */
3393   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3394   bnew        = (Mat_SeqAIJ*)Bnew->data;
3395   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3396   maij->B     = Bnew;
3397 
3398   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3399 
3400   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3401   b->free_a       = PETSC_FALSE;
3402   b->free_ij      = PETSC_FALSE;
3403   ierr = MatDestroy(&B);CHKERRQ(ierr);
3404 
3405   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3406   bnew->free_a       = PETSC_TRUE;
3407   bnew->free_ij      = PETSC_TRUE;
3408 
3409   /* condense columns of maij->B */
3410   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3411   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3412   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3413   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3414   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3415   PetscFunctionReturn(0);
3416 }
3417 
3418 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3419 
3420 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3421 {
3422   PetscErrorCode ierr;
3423   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3424   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3425   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3426   Mat            M,Msub,B=a->B;
3427   MatScalar      *aa;
3428   Mat_SeqAIJ     *aij;
3429   PetscInt       *garray = a->garray,*colsub,Ncols;
3430   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3431   IS             iscol_sub,iscmap;
3432   const PetscInt *is_idx,*cmap;
3433   PetscBool      allcolumns=PETSC_FALSE;
3434   MPI_Comm       comm;
3435 
3436   PetscFunctionBegin;
3437   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3438 
3439   if (call == MAT_REUSE_MATRIX) {
3440     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3441     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3442     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3443 
3444     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3445     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3446 
3447     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3448     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3449 
3450     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3451 
3452   } else { /* call == MAT_INITIAL_MATRIX) */
3453     PetscBool flg;
3454 
3455     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3456     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3457 
3458     /* (1) iscol -> nonscalable iscol_local */
3459     /* Check for special case: each processor gets entire matrix columns */
3460     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3461     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3462     if (allcolumns) {
3463       iscol_sub = iscol_local;
3464       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3465       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3466 
3467     } else {
3468       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3469       PetscInt *idx,*cmap1,k;
3470       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3471       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3472       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3473       count = 0;
3474       k     = 0;
3475       for (i=0; i<Ncols; i++) {
3476         j = is_idx[i];
3477         if (j >= cstart && j < cend) {
3478           /* diagonal part of mat */
3479           idx[count]     = j;
3480           cmap1[count++] = i; /* column index in submat */
3481         } else if (Bn) {
3482           /* off-diagonal part of mat */
3483           if (j == garray[k]) {
3484             idx[count]     = j;
3485             cmap1[count++] = i;  /* column index in submat */
3486           } else if (j > garray[k]) {
3487             while (j > garray[k] && k < Bn-1) k++;
3488             if (j == garray[k]) {
3489               idx[count]     = j;
3490               cmap1[count++] = i; /* column index in submat */
3491             }
3492           }
3493         }
3494       }
3495       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3496 
3497       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3498       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3499       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3500 
3501       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3502     }
3503 
3504     /* (3) Create sequential Msub */
3505     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3506   }
3507 
3508   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3509   aij  = (Mat_SeqAIJ*)(Msub)->data;
3510   ii   = aij->i;
3511   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3512 
3513   /*
3514       m - number of local rows
3515       Ncols - number of columns (same on all processors)
3516       rstart - first row in new global matrix generated
3517   */
3518   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3519 
3520   if (call == MAT_INITIAL_MATRIX) {
3521     /* (4) Create parallel newmat */
3522     PetscMPIInt    rank,size;
3523     PetscInt       csize;
3524 
3525     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3526     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3527 
3528     /*
3529         Determine the number of non-zeros in the diagonal and off-diagonal
3530         portions of the matrix in order to do correct preallocation
3531     */
3532 
3533     /* first get start and end of "diagonal" columns */
3534     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3535     if (csize == PETSC_DECIDE) {
3536       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3537       if (mglobal == Ncols) { /* square matrix */
3538         nlocal = m;
3539       } else {
3540         nlocal = Ncols/size + ((Ncols % size) > rank);
3541       }
3542     } else {
3543       nlocal = csize;
3544     }
3545     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3546     rstart = rend - nlocal;
3547     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3548 
3549     /* next, compute all the lengths */
3550     jj    = aij->j;
3551     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3552     olens = dlens + m;
3553     for (i=0; i<m; i++) {
3554       jend = ii[i+1] - ii[i];
3555       olen = 0;
3556       dlen = 0;
3557       for (j=0; j<jend; j++) {
3558         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3559         else dlen++;
3560         jj++;
3561       }
3562       olens[i] = olen;
3563       dlens[i] = dlen;
3564     }
3565 
3566     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3567     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3568 
3569     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3570     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3571     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3572     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3573     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3574     ierr = PetscFree(dlens);CHKERRQ(ierr);
3575 
3576   } else { /* call == MAT_REUSE_MATRIX */
3577     M    = *newmat;
3578     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3579     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3580     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3581     /*
3582          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3583        rather than the slower MatSetValues().
3584     */
3585     M->was_assembled = PETSC_TRUE;
3586     M->assembled     = PETSC_FALSE;
3587   }
3588 
3589   /* (5) Set values of Msub to *newmat */
3590   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3591   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3592 
3593   jj   = aij->j;
3594   aa   = aij->a;
3595   for (i=0; i<m; i++) {
3596     row = rstart + i;
3597     nz  = ii[i+1] - ii[i];
3598     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3599     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3600     jj += nz; aa += nz;
3601   }
3602   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3603 
3604   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3605   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3606 
3607   ierr = PetscFree(colsub);CHKERRQ(ierr);
3608 
3609   /* save Msub, iscol_sub and iscmap used in processor for next request */
3610   if (call ==  MAT_INITIAL_MATRIX) {
3611     *newmat = M;
3612     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3613     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3614 
3615     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3616     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3617 
3618     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3619     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3620 
3621     if (iscol_local) {
3622       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3623       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3624     }
3625   }
3626   PetscFunctionReturn(0);
3627 }
3628 
3629 /*
3630     Not great since it makes two copies of the submatrix, first an SeqAIJ
3631   in local and then by concatenating the local matrices the end result.
3632   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3633 
3634   Note: This requires a sequential iscol with all indices.
3635 */
3636 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3637 {
3638   PetscErrorCode ierr;
3639   PetscMPIInt    rank,size;
3640   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3641   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3642   Mat            M,Mreuse;
3643   MatScalar      *aa,*vwork;
3644   MPI_Comm       comm;
3645   Mat_SeqAIJ     *aij;
3646   PetscBool      colflag,allcolumns=PETSC_FALSE;
3647 
3648   PetscFunctionBegin;
3649   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3650   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3651   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3652 
3653   /* Check for special case: each processor gets entire matrix columns */
3654   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3655   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3656   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3657 
3658   if (call ==  MAT_REUSE_MATRIX) {
3659     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3660     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3661     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3662   } else {
3663     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3664   }
3665 
3666   /*
3667       m - number of local rows
3668       n - number of columns (same on all processors)
3669       rstart - first row in new global matrix generated
3670   */
3671   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3672   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3673   if (call == MAT_INITIAL_MATRIX) {
3674     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3675     ii  = aij->i;
3676     jj  = aij->j;
3677 
3678     /*
3679         Determine the number of non-zeros in the diagonal and off-diagonal
3680         portions of the matrix in order to do correct preallocation
3681     */
3682 
3683     /* first get start and end of "diagonal" columns */
3684     if (csize == PETSC_DECIDE) {
3685       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3686       if (mglobal == n) { /* square matrix */
3687         nlocal = m;
3688       } else {
3689         nlocal = n/size + ((n % size) > rank);
3690       }
3691     } else {
3692       nlocal = csize;
3693     }
3694     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3695     rstart = rend - nlocal;
3696     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3697 
3698     /* next, compute all the lengths */
3699     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3700     olens = dlens + m;
3701     for (i=0; i<m; i++) {
3702       jend = ii[i+1] - ii[i];
3703       olen = 0;
3704       dlen = 0;
3705       for (j=0; j<jend; j++) {
3706         if (*jj < rstart || *jj >= rend) olen++;
3707         else dlen++;
3708         jj++;
3709       }
3710       olens[i] = olen;
3711       dlens[i] = dlen;
3712     }
3713     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3714     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3715     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3716     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3717     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3718     ierr = PetscFree(dlens);CHKERRQ(ierr);
3719   } else {
3720     PetscInt ml,nl;
3721 
3722     M    = *newmat;
3723     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3724     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3725     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3726     /*
3727          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3728        rather than the slower MatSetValues().
3729     */
3730     M->was_assembled = PETSC_TRUE;
3731     M->assembled     = PETSC_FALSE;
3732   }
3733   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3734   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3735   ii   = aij->i;
3736   jj   = aij->j;
3737   aa   = aij->a;
3738   for (i=0; i<m; i++) {
3739     row   = rstart + i;
3740     nz    = ii[i+1] - ii[i];
3741     cwork = jj;     jj += nz;
3742     vwork = aa;     aa += nz;
3743     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3744   }
3745 
3746   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3747   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3748   *newmat = M;
3749 
3750   /* save submatrix used in processor for next request */
3751   if (call ==  MAT_INITIAL_MATRIX) {
3752     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3753     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3754   }
3755   PetscFunctionReturn(0);
3756 }
3757 
3758 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3759 {
3760   PetscInt       m,cstart, cend,j,nnz,i,d;
3761   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3762   const PetscInt *JJ;
3763   PetscScalar    *values;
3764   PetscErrorCode ierr;
3765   PetscBool      nooffprocentries;
3766 
3767   PetscFunctionBegin;
3768   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3769 
3770   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3771   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3772   m      = B->rmap->n;
3773   cstart = B->cmap->rstart;
3774   cend   = B->cmap->rend;
3775   rstart = B->rmap->rstart;
3776 
3777   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3778 
3779 #if defined(PETSC_USE_DEBUGGING)
3780   for (i=0; i<m; i++) {
3781     nnz = Ii[i+1]- Ii[i];
3782     JJ  = J + Ii[i];
3783     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3784     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3785     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3786   }
3787 #endif
3788 
3789   for (i=0; i<m; i++) {
3790     nnz     = Ii[i+1]- Ii[i];
3791     JJ      = J + Ii[i];
3792     nnz_max = PetscMax(nnz_max,nnz);
3793     d       = 0;
3794     for (j=0; j<nnz; j++) {
3795       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3796     }
3797     d_nnz[i] = d;
3798     o_nnz[i] = nnz - d;
3799   }
3800   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3801   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3802 
3803   if (v) values = (PetscScalar*)v;
3804   else {
3805     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3806   }
3807 
3808   for (i=0; i<m; i++) {
3809     ii   = i + rstart;
3810     nnz  = Ii[i+1]- Ii[i];
3811     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3812   }
3813   nooffprocentries    = B->nooffprocentries;
3814   B->nooffprocentries = PETSC_TRUE;
3815   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3816   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3817   B->nooffprocentries = nooffprocentries;
3818 
3819   if (!v) {
3820     ierr = PetscFree(values);CHKERRQ(ierr);
3821   }
3822   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3823   PetscFunctionReturn(0);
3824 }
3825 
3826 /*@
3827    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3828    (the default parallel PETSc format).
3829 
3830    Collective on MPI_Comm
3831 
3832    Input Parameters:
3833 +  B - the matrix
3834 .  i - the indices into j for the start of each local row (starts with zero)
3835 .  j - the column indices for each local row (starts with zero)
3836 -  v - optional values in the matrix
3837 
3838    Level: developer
3839 
3840    Notes:
3841        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3842      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3843      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3844 
3845        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3846 
3847        The format which is used for the sparse matrix input, is equivalent to a
3848     row-major ordering.. i.e for the following matrix, the input data expected is
3849     as shown
3850 
3851 $        1 0 0
3852 $        2 0 3     P0
3853 $       -------
3854 $        4 5 6     P1
3855 $
3856 $     Process0 [P0]: rows_owned=[0,1]
3857 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3858 $        j =  {0,0,2}  [size = 3]
3859 $        v =  {1,2,3}  [size = 3]
3860 $
3861 $     Process1 [P1]: rows_owned=[2]
3862 $        i =  {0,3}    [size = nrow+1  = 1+1]
3863 $        j =  {0,1,2}  [size = 3]
3864 $        v =  {4,5,6}  [size = 3]
3865 
3866 .keywords: matrix, aij, compressed row, sparse, parallel
3867 
3868 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3869           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3870 @*/
3871 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3872 {
3873   PetscErrorCode ierr;
3874 
3875   PetscFunctionBegin;
3876   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3877   PetscFunctionReturn(0);
3878 }
3879 
3880 /*@C
3881    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3882    (the default parallel PETSc format).  For good matrix assembly performance
3883    the user should preallocate the matrix storage by setting the parameters
3884    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3885    performance can be increased by more than a factor of 50.
3886 
3887    Collective on MPI_Comm
3888 
3889    Input Parameters:
3890 +  B - the matrix
3891 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3892            (same value is used for all local rows)
3893 .  d_nnz - array containing the number of nonzeros in the various rows of the
3894            DIAGONAL portion of the local submatrix (possibly different for each row)
3895            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3896            The size of this array is equal to the number of local rows, i.e 'm'.
3897            For matrices that will be factored, you must leave room for (and set)
3898            the diagonal entry even if it is zero.
3899 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3900            submatrix (same value is used for all local rows).
3901 -  o_nnz - array containing the number of nonzeros in the various rows of the
3902            OFF-DIAGONAL portion of the local submatrix (possibly different for
3903            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3904            structure. The size of this array is equal to the number
3905            of local rows, i.e 'm'.
3906 
3907    If the *_nnz parameter is given then the *_nz parameter is ignored
3908 
3909    The AIJ format (also called the Yale sparse matrix format or
3910    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3911    storage.  The stored row and column indices begin with zero.
3912    See Users-Manual: ch_mat for details.
3913 
3914    The parallel matrix is partitioned such that the first m0 rows belong to
3915    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3916    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3917 
3918    The DIAGONAL portion of the local submatrix of a processor can be defined
3919    as the submatrix which is obtained by extraction the part corresponding to
3920    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3921    first row that belongs to the processor, r2 is the last row belonging to
3922    the this processor, and c1-c2 is range of indices of the local part of a
3923    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3924    common case of a square matrix, the row and column ranges are the same and
3925    the DIAGONAL part is also square. The remaining portion of the local
3926    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3927 
3928    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3929 
3930    You can call MatGetInfo() to get information on how effective the preallocation was;
3931    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3932    You can also run with the option -info and look for messages with the string
3933    malloc in them to see if additional memory allocation was needed.
3934 
3935    Example usage:
3936 
3937    Consider the following 8x8 matrix with 34 non-zero values, that is
3938    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3939    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3940    as follows:
3941 
3942 .vb
3943             1  2  0  |  0  3  0  |  0  4
3944     Proc0   0  5  6  |  7  0  0  |  8  0
3945             9  0 10  | 11  0  0  | 12  0
3946     -------------------------------------
3947            13  0 14  | 15 16 17  |  0  0
3948     Proc1   0 18  0  | 19 20 21  |  0  0
3949             0  0  0  | 22 23  0  | 24  0
3950     -------------------------------------
3951     Proc2  25 26 27  |  0  0 28  | 29  0
3952            30  0  0  | 31 32 33  |  0 34
3953 .ve
3954 
3955    This can be represented as a collection of submatrices as:
3956 
3957 .vb
3958       A B C
3959       D E F
3960       G H I
3961 .ve
3962 
3963    Where the submatrices A,B,C are owned by proc0, D,E,F are
3964    owned by proc1, G,H,I are owned by proc2.
3965 
3966    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3967    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3968    The 'M','N' parameters are 8,8, and have the same values on all procs.
3969 
3970    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3971    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3972    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3973    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3974    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3975    matrix, ans [DF] as another SeqAIJ matrix.
3976 
3977    When d_nz, o_nz parameters are specified, d_nz storage elements are
3978    allocated for every row of the local diagonal submatrix, and o_nz
3979    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3980    One way to choose d_nz and o_nz is to use the max nonzerors per local
3981    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3982    In this case, the values of d_nz,o_nz are:
3983 .vb
3984      proc0 : dnz = 2, o_nz = 2
3985      proc1 : dnz = 3, o_nz = 2
3986      proc2 : dnz = 1, o_nz = 4
3987 .ve
3988    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3989    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3990    for proc3. i.e we are using 12+15+10=37 storage locations to store
3991    34 values.
3992 
3993    When d_nnz, o_nnz parameters are specified, the storage is specified
3994    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3995    In the above case the values for d_nnz,o_nnz are:
3996 .vb
3997      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3998      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3999      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4000 .ve
4001    Here the space allocated is sum of all the above values i.e 34, and
4002    hence pre-allocation is perfect.
4003 
4004    Level: intermediate
4005 
4006 .keywords: matrix, aij, compressed row, sparse, parallel
4007 
4008 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4009           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4010 @*/
4011 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4012 {
4013   PetscErrorCode ierr;
4014 
4015   PetscFunctionBegin;
4016   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4017   PetscValidType(B,1);
4018   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4019   PetscFunctionReturn(0);
4020 }
4021 
4022 /*@
4023      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4024          CSR format the local rows.
4025 
4026    Collective on MPI_Comm
4027 
4028    Input Parameters:
4029 +  comm - MPI communicator
4030 .  m - number of local rows (Cannot be PETSC_DECIDE)
4031 .  n - This value should be the same as the local size used in creating the
4032        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4033        calculated if N is given) For square matrices n is almost always m.
4034 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4035 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4036 .   i - row indices
4037 .   j - column indices
4038 -   a - matrix values
4039 
4040    Output Parameter:
4041 .   mat - the matrix
4042 
4043    Level: intermediate
4044 
4045    Notes:
4046        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4047      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4048      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4049 
4050        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4051 
4052        The format which is used for the sparse matrix input, is equivalent to a
4053     row-major ordering.. i.e for the following matrix, the input data expected is
4054     as shown
4055 
4056 $        1 0 0
4057 $        2 0 3     P0
4058 $       -------
4059 $        4 5 6     P1
4060 $
4061 $     Process0 [P0]: rows_owned=[0,1]
4062 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4063 $        j =  {0,0,2}  [size = 3]
4064 $        v =  {1,2,3}  [size = 3]
4065 $
4066 $     Process1 [P1]: rows_owned=[2]
4067 $        i =  {0,3}    [size = nrow+1  = 1+1]
4068 $        j =  {0,1,2}  [size = 3]
4069 $        v =  {4,5,6}  [size = 3]
4070 
4071 .keywords: matrix, aij, compressed row, sparse, parallel
4072 
4073 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4074           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4075 @*/
4076 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4077 {
4078   PetscErrorCode ierr;
4079 
4080   PetscFunctionBegin;
4081   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4082   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4083   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4084   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4085   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4086   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4087   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4088   PetscFunctionReturn(0);
4089 }
4090 
4091 /*@C
4092    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4093    (the default parallel PETSc format).  For good matrix assembly performance
4094    the user should preallocate the matrix storage by setting the parameters
4095    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4096    performance can be increased by more than a factor of 50.
4097 
4098    Collective on MPI_Comm
4099 
4100    Input Parameters:
4101 +  comm - MPI communicator
4102 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4103            This value should be the same as the local size used in creating the
4104            y vector for the matrix-vector product y = Ax.
4105 .  n - This value should be the same as the local size used in creating the
4106        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4107        calculated if N is given) For square matrices n is almost always m.
4108 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4109 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4110 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4111            (same value is used for all local rows)
4112 .  d_nnz - array containing the number of nonzeros in the various rows of the
4113            DIAGONAL portion of the local submatrix (possibly different for each row)
4114            or NULL, if d_nz is used to specify the nonzero structure.
4115            The size of this array is equal to the number of local rows, i.e 'm'.
4116 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4117            submatrix (same value is used for all local rows).
4118 -  o_nnz - array containing the number of nonzeros in the various rows of the
4119            OFF-DIAGONAL portion of the local submatrix (possibly different for
4120            each row) or NULL, if o_nz is used to specify the nonzero
4121            structure. The size of this array is equal to the number
4122            of local rows, i.e 'm'.
4123 
4124    Output Parameter:
4125 .  A - the matrix
4126 
4127    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4128    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4129    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4130 
4131    Notes:
4132    If the *_nnz parameter is given then the *_nz parameter is ignored
4133 
4134    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4135    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4136    storage requirements for this matrix.
4137 
4138    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4139    processor than it must be used on all processors that share the object for
4140    that argument.
4141 
4142    The user MUST specify either the local or global matrix dimensions
4143    (possibly both).
4144 
4145    The parallel matrix is partitioned across processors such that the
4146    first m0 rows belong to process 0, the next m1 rows belong to
4147    process 1, the next m2 rows belong to process 2 etc.. where
4148    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4149    values corresponding to [m x N] submatrix.
4150 
4151    The columns are logically partitioned with the n0 columns belonging
4152    to 0th partition, the next n1 columns belonging to the next
4153    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4154 
4155    The DIAGONAL portion of the local submatrix on any given processor
4156    is the submatrix corresponding to the rows and columns m,n
4157    corresponding to the given processor. i.e diagonal matrix on
4158    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4159    etc. The remaining portion of the local submatrix [m x (N-n)]
4160    constitute the OFF-DIAGONAL portion. The example below better
4161    illustrates this concept.
4162 
4163    For a square global matrix we define each processor's diagonal portion
4164    to be its local rows and the corresponding columns (a square submatrix);
4165    each processor's off-diagonal portion encompasses the remainder of the
4166    local matrix (a rectangular submatrix).
4167 
4168    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4169 
4170    When calling this routine with a single process communicator, a matrix of
4171    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4172    type of communicator, use the construction mechanism
4173 .vb
4174      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4175 .ve
4176 
4177 $     MatCreate(...,&A);
4178 $     MatSetType(A,MATMPIAIJ);
4179 $     MatSetSizes(A, m,n,M,N);
4180 $     MatMPIAIJSetPreallocation(A,...);
4181 
4182    By default, this format uses inodes (identical nodes) when possible.
4183    We search for consecutive rows with the same nonzero structure, thereby
4184    reusing matrix information to achieve increased efficiency.
4185 
4186    Options Database Keys:
4187 +  -mat_no_inode  - Do not use inodes
4188 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4189 -  -mat_aij_oneindex - Internally use indexing starting at 1
4190         rather than 0.  Note that when calling MatSetValues(),
4191         the user still MUST index entries starting at 0!
4192 
4193 
4194    Example usage:
4195 
4196    Consider the following 8x8 matrix with 34 non-zero values, that is
4197    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4198    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4199    as follows
4200 
4201 .vb
4202             1  2  0  |  0  3  0  |  0  4
4203     Proc0   0  5  6  |  7  0  0  |  8  0
4204             9  0 10  | 11  0  0  | 12  0
4205     -------------------------------------
4206            13  0 14  | 15 16 17  |  0  0
4207     Proc1   0 18  0  | 19 20 21  |  0  0
4208             0  0  0  | 22 23  0  | 24  0
4209     -------------------------------------
4210     Proc2  25 26 27  |  0  0 28  | 29  0
4211            30  0  0  | 31 32 33  |  0 34
4212 .ve
4213 
4214    This can be represented as a collection of submatrices as
4215 
4216 .vb
4217       A B C
4218       D E F
4219       G H I
4220 .ve
4221 
4222    Where the submatrices A,B,C are owned by proc0, D,E,F are
4223    owned by proc1, G,H,I are owned by proc2.
4224 
4225    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4226    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4227    The 'M','N' parameters are 8,8, and have the same values on all procs.
4228 
4229    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4230    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4231    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4232    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4233    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4234    matrix, ans [DF] as another SeqAIJ matrix.
4235 
4236    When d_nz, o_nz parameters are specified, d_nz storage elements are
4237    allocated for every row of the local diagonal submatrix, and o_nz
4238    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4239    One way to choose d_nz and o_nz is to use the max nonzerors per local
4240    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4241    In this case, the values of d_nz,o_nz are
4242 .vb
4243      proc0 : dnz = 2, o_nz = 2
4244      proc1 : dnz = 3, o_nz = 2
4245      proc2 : dnz = 1, o_nz = 4
4246 .ve
4247    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4248    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4249    for proc3. i.e we are using 12+15+10=37 storage locations to store
4250    34 values.
4251 
4252    When d_nnz, o_nnz parameters are specified, the storage is specified
4253    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4254    In the above case the values for d_nnz,o_nnz are
4255 .vb
4256      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4257      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4258      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4259 .ve
4260    Here the space allocated is sum of all the above values i.e 34, and
4261    hence pre-allocation is perfect.
4262 
4263    Level: intermediate
4264 
4265 .keywords: matrix, aij, compressed row, sparse, parallel
4266 
4267 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4268           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4269 @*/
4270 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4271 {
4272   PetscErrorCode ierr;
4273   PetscMPIInt    size;
4274 
4275   PetscFunctionBegin;
4276   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4277   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4278   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4279   if (size > 1) {
4280     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4281     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4282   } else {
4283     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4284     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4285   }
4286   PetscFunctionReturn(0);
4287 }
4288 
4289 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4290 {
4291   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4292   PetscBool      flg;
4293   PetscErrorCode ierr;
4294 
4295   PetscFunctionBegin;
4296   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4297   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4298   if (Ad)     *Ad     = a->A;
4299   if (Ao)     *Ao     = a->B;
4300   if (colmap) *colmap = a->garray;
4301   PetscFunctionReturn(0);
4302 }
4303 
4304 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4305 {
4306   PetscErrorCode ierr;
4307   PetscInt       m,N,i,rstart,nnz,Ii;
4308   PetscInt       *indx;
4309   PetscScalar    *values;
4310 
4311   PetscFunctionBegin;
4312   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4313   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4314     PetscInt       *dnz,*onz,sum,bs,cbs;
4315 
4316     if (n == PETSC_DECIDE) {
4317       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4318     }
4319     /* Check sum(n) = N */
4320     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4321     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4322 
4323     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4324     rstart -= m;
4325 
4326     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4327     for (i=0; i<m; i++) {
4328       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4329       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4330       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4331     }
4332 
4333     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4334     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4335     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4336     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4337     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4338     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4339     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4340     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4341   }
4342 
4343   /* numeric phase */
4344   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4345   for (i=0; i<m; i++) {
4346     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4347     Ii   = i + rstart;
4348     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4349     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4350   }
4351   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4352   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4353   PetscFunctionReturn(0);
4354 }
4355 
4356 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4357 {
4358   PetscErrorCode    ierr;
4359   PetscMPIInt       rank;
4360   PetscInt          m,N,i,rstart,nnz;
4361   size_t            len;
4362   const PetscInt    *indx;
4363   PetscViewer       out;
4364   char              *name;
4365   Mat               B;
4366   const PetscScalar *values;
4367 
4368   PetscFunctionBegin;
4369   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4370   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4371   /* Should this be the type of the diagonal block of A? */
4372   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4373   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4374   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4375   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4376   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4377   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4378   for (i=0; i<m; i++) {
4379     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4380     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4381     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4382   }
4383   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4384   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4385 
4386   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4387   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4388   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4389   sprintf(name,"%s.%d",outfile,rank);
4390   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4391   ierr = PetscFree(name);CHKERRQ(ierr);
4392   ierr = MatView(B,out);CHKERRQ(ierr);
4393   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4394   ierr = MatDestroy(&B);CHKERRQ(ierr);
4395   PetscFunctionReturn(0);
4396 }
4397 
4398 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4399 {
4400   PetscErrorCode      ierr;
4401   Mat_Merge_SeqsToMPI *merge;
4402   PetscContainer      container;
4403 
4404   PetscFunctionBegin;
4405   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4406   if (container) {
4407     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4408     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4409     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4410     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4411     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4412     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4413     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4414     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4415     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4416     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4417     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4418     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4419     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4420     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4421     ierr = PetscFree(merge);CHKERRQ(ierr);
4422     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4423   }
4424   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4425   PetscFunctionReturn(0);
4426 }
4427 
4428 #include <../src/mat/utils/freespace.h>
4429 #include <petscbt.h>
4430 
4431 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4432 {
4433   PetscErrorCode      ierr;
4434   MPI_Comm            comm;
4435   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4436   PetscMPIInt         size,rank,taga,*len_s;
4437   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4438   PetscInt            proc,m;
4439   PetscInt            **buf_ri,**buf_rj;
4440   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4441   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4442   MPI_Request         *s_waits,*r_waits;
4443   MPI_Status          *status;
4444   MatScalar           *aa=a->a;
4445   MatScalar           **abuf_r,*ba_i;
4446   Mat_Merge_SeqsToMPI *merge;
4447   PetscContainer      container;
4448 
4449   PetscFunctionBegin;
4450   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4451   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4452 
4453   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4454   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4455 
4456   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4457   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4458 
4459   bi     = merge->bi;
4460   bj     = merge->bj;
4461   buf_ri = merge->buf_ri;
4462   buf_rj = merge->buf_rj;
4463 
4464   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4465   owners = merge->rowmap->range;
4466   len_s  = merge->len_s;
4467 
4468   /* send and recv matrix values */
4469   /*-----------------------------*/
4470   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4471   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4472 
4473   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4474   for (proc=0,k=0; proc<size; proc++) {
4475     if (!len_s[proc]) continue;
4476     i    = owners[proc];
4477     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4478     k++;
4479   }
4480 
4481   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4482   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4483   ierr = PetscFree(status);CHKERRQ(ierr);
4484 
4485   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4486   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4487 
4488   /* insert mat values of mpimat */
4489   /*----------------------------*/
4490   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4491   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4492 
4493   for (k=0; k<merge->nrecv; k++) {
4494     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4495     nrows       = *(buf_ri_k[k]);
4496     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4497     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4498   }
4499 
4500   /* set values of ba */
4501   m = merge->rowmap->n;
4502   for (i=0; i<m; i++) {
4503     arow = owners[rank] + i;
4504     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4505     bnzi = bi[i+1] - bi[i];
4506     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4507 
4508     /* add local non-zero vals of this proc's seqmat into ba */
4509     anzi   = ai[arow+1] - ai[arow];
4510     aj     = a->j + ai[arow];
4511     aa     = a->a + ai[arow];
4512     nextaj = 0;
4513     for (j=0; nextaj<anzi; j++) {
4514       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4515         ba_i[j] += aa[nextaj++];
4516       }
4517     }
4518 
4519     /* add received vals into ba */
4520     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4521       /* i-th row */
4522       if (i == *nextrow[k]) {
4523         anzi   = *(nextai[k]+1) - *nextai[k];
4524         aj     = buf_rj[k] + *(nextai[k]);
4525         aa     = abuf_r[k] + *(nextai[k]);
4526         nextaj = 0;
4527         for (j=0; nextaj<anzi; j++) {
4528           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4529             ba_i[j] += aa[nextaj++];
4530           }
4531         }
4532         nextrow[k]++; nextai[k]++;
4533       }
4534     }
4535     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4536   }
4537   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4538   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4539 
4540   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4541   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4542   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4543   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4544   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4545   PetscFunctionReturn(0);
4546 }
4547 
4548 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4549 {
4550   PetscErrorCode      ierr;
4551   Mat                 B_mpi;
4552   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4553   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4554   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4555   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4556   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4557   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4558   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4559   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4560   MPI_Status          *status;
4561   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4562   PetscBT             lnkbt;
4563   Mat_Merge_SeqsToMPI *merge;
4564   PetscContainer      container;
4565 
4566   PetscFunctionBegin;
4567   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4568 
4569   /* make sure it is a PETSc comm */
4570   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4571   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4572   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4573 
4574   ierr = PetscNew(&merge);CHKERRQ(ierr);
4575   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4576 
4577   /* determine row ownership */
4578   /*---------------------------------------------------------*/
4579   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4580   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4581   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4582   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4583   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4584   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4585   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4586 
4587   m      = merge->rowmap->n;
4588   owners = merge->rowmap->range;
4589 
4590   /* determine the number of messages to send, their lengths */
4591   /*---------------------------------------------------------*/
4592   len_s = merge->len_s;
4593 
4594   len          = 0; /* length of buf_si[] */
4595   merge->nsend = 0;
4596   for (proc=0; proc<size; proc++) {
4597     len_si[proc] = 0;
4598     if (proc == rank) {
4599       len_s[proc] = 0;
4600     } else {
4601       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4602       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4603     }
4604     if (len_s[proc]) {
4605       merge->nsend++;
4606       nrows = 0;
4607       for (i=owners[proc]; i<owners[proc+1]; i++) {
4608         if (ai[i+1] > ai[i]) nrows++;
4609       }
4610       len_si[proc] = 2*(nrows+1);
4611       len         += len_si[proc];
4612     }
4613   }
4614 
4615   /* determine the number and length of messages to receive for ij-structure */
4616   /*-------------------------------------------------------------------------*/
4617   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4618   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4619 
4620   /* post the Irecv of j-structure */
4621   /*-------------------------------*/
4622   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4623   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4624 
4625   /* post the Isend of j-structure */
4626   /*--------------------------------*/
4627   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4628 
4629   for (proc=0, k=0; proc<size; proc++) {
4630     if (!len_s[proc]) continue;
4631     i    = owners[proc];
4632     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4633     k++;
4634   }
4635 
4636   /* receives and sends of j-structure are complete */
4637   /*------------------------------------------------*/
4638   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4639   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4640 
4641   /* send and recv i-structure */
4642   /*---------------------------*/
4643   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4644   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4645 
4646   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4647   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4648   for (proc=0,k=0; proc<size; proc++) {
4649     if (!len_s[proc]) continue;
4650     /* form outgoing message for i-structure:
4651          buf_si[0]:                 nrows to be sent
4652                [1:nrows]:           row index (global)
4653                [nrows+1:2*nrows+1]: i-structure index
4654     */
4655     /*-------------------------------------------*/
4656     nrows       = len_si[proc]/2 - 1;
4657     buf_si_i    = buf_si + nrows+1;
4658     buf_si[0]   = nrows;
4659     buf_si_i[0] = 0;
4660     nrows       = 0;
4661     for (i=owners[proc]; i<owners[proc+1]; i++) {
4662       anzi = ai[i+1] - ai[i];
4663       if (anzi) {
4664         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4665         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4666         nrows++;
4667       }
4668     }
4669     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4670     k++;
4671     buf_si += len_si[proc];
4672   }
4673 
4674   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4675   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4676 
4677   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4678   for (i=0; i<merge->nrecv; i++) {
4679     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4680   }
4681 
4682   ierr = PetscFree(len_si);CHKERRQ(ierr);
4683   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4684   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4685   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4686   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4687   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4688   ierr = PetscFree(status);CHKERRQ(ierr);
4689 
4690   /* compute a local seq matrix in each processor */
4691   /*----------------------------------------------*/
4692   /* allocate bi array and free space for accumulating nonzero column info */
4693   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4694   bi[0] = 0;
4695 
4696   /* create and initialize a linked list */
4697   nlnk = N+1;
4698   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4699 
4700   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4701   len  = ai[owners[rank+1]] - ai[owners[rank]];
4702   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4703 
4704   current_space = free_space;
4705 
4706   /* determine symbolic info for each local row */
4707   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4708 
4709   for (k=0; k<merge->nrecv; k++) {
4710     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4711     nrows       = *buf_ri_k[k];
4712     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4713     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4714   }
4715 
4716   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4717   len  = 0;
4718   for (i=0; i<m; i++) {
4719     bnzi = 0;
4720     /* add local non-zero cols of this proc's seqmat into lnk */
4721     arow  = owners[rank] + i;
4722     anzi  = ai[arow+1] - ai[arow];
4723     aj    = a->j + ai[arow];
4724     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4725     bnzi += nlnk;
4726     /* add received col data into lnk */
4727     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4728       if (i == *nextrow[k]) { /* i-th row */
4729         anzi  = *(nextai[k]+1) - *nextai[k];
4730         aj    = buf_rj[k] + *nextai[k];
4731         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4732         bnzi += nlnk;
4733         nextrow[k]++; nextai[k]++;
4734       }
4735     }
4736     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4737 
4738     /* if free space is not available, make more free space */
4739     if (current_space->local_remaining<bnzi) {
4740       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4741       nspacedouble++;
4742     }
4743     /* copy data into free space, then initialize lnk */
4744     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4745     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4746 
4747     current_space->array           += bnzi;
4748     current_space->local_used      += bnzi;
4749     current_space->local_remaining -= bnzi;
4750 
4751     bi[i+1] = bi[i] + bnzi;
4752   }
4753 
4754   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4755 
4756   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4757   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4758   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4759 
4760   /* create symbolic parallel matrix B_mpi */
4761   /*---------------------------------------*/
4762   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4763   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4764   if (n==PETSC_DECIDE) {
4765     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4766   } else {
4767     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4768   }
4769   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4770   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4771   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4772   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4773   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4774 
4775   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4776   B_mpi->assembled    = PETSC_FALSE;
4777   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4778   merge->bi           = bi;
4779   merge->bj           = bj;
4780   merge->buf_ri       = buf_ri;
4781   merge->buf_rj       = buf_rj;
4782   merge->coi          = NULL;
4783   merge->coj          = NULL;
4784   merge->owners_co    = NULL;
4785 
4786   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4787 
4788   /* attach the supporting struct to B_mpi for reuse */
4789   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4790   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4791   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4792   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4793   *mpimat = B_mpi;
4794 
4795   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4796   PetscFunctionReturn(0);
4797 }
4798 
4799 /*@C
4800       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4801                  matrices from each processor
4802 
4803     Collective on MPI_Comm
4804 
4805    Input Parameters:
4806 +    comm - the communicators the parallel matrix will live on
4807 .    seqmat - the input sequential matrices
4808 .    m - number of local rows (or PETSC_DECIDE)
4809 .    n - number of local columns (or PETSC_DECIDE)
4810 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4811 
4812    Output Parameter:
4813 .    mpimat - the parallel matrix generated
4814 
4815     Level: advanced
4816 
4817    Notes:
4818      The dimensions of the sequential matrix in each processor MUST be the same.
4819      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4820      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4821 @*/
4822 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4823 {
4824   PetscErrorCode ierr;
4825   PetscMPIInt    size;
4826 
4827   PetscFunctionBegin;
4828   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4829   if (size == 1) {
4830     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4831     if (scall == MAT_INITIAL_MATRIX) {
4832       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4833     } else {
4834       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4835     }
4836     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4837     PetscFunctionReturn(0);
4838   }
4839   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4840   if (scall == MAT_INITIAL_MATRIX) {
4841     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4842   }
4843   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4844   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4845   PetscFunctionReturn(0);
4846 }
4847 
4848 /*@
4849      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4850           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4851           with MatGetSize()
4852 
4853     Not Collective
4854 
4855    Input Parameters:
4856 +    A - the matrix
4857 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4858 
4859    Output Parameter:
4860 .    A_loc - the local sequential matrix generated
4861 
4862     Level: developer
4863 
4864 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4865 
4866 @*/
4867 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4868 {
4869   PetscErrorCode ierr;
4870   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4871   Mat_SeqAIJ     *mat,*a,*b;
4872   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4873   MatScalar      *aa,*ba,*cam;
4874   PetscScalar    *ca;
4875   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4876   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4877   PetscBool      match;
4878   MPI_Comm       comm;
4879   PetscMPIInt    size;
4880 
4881   PetscFunctionBegin;
4882   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4883   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4884   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4885   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4886   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4887 
4888   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4889   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4890   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4891   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4892   aa = a->a; ba = b->a;
4893   if (scall == MAT_INITIAL_MATRIX) {
4894     if (size == 1) {
4895       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4896       PetscFunctionReturn(0);
4897     }
4898 
4899     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4900     ci[0] = 0;
4901     for (i=0; i<am; i++) {
4902       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4903     }
4904     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4905     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4906     k    = 0;
4907     for (i=0; i<am; i++) {
4908       ncols_o = bi[i+1] - bi[i];
4909       ncols_d = ai[i+1] - ai[i];
4910       /* off-diagonal portion of A */
4911       for (jo=0; jo<ncols_o; jo++) {
4912         col = cmap[*bj];
4913         if (col >= cstart) break;
4914         cj[k]   = col; bj++;
4915         ca[k++] = *ba++;
4916       }
4917       /* diagonal portion of A */
4918       for (j=0; j<ncols_d; j++) {
4919         cj[k]   = cstart + *aj++;
4920         ca[k++] = *aa++;
4921       }
4922       /* off-diagonal portion of A */
4923       for (j=jo; j<ncols_o; j++) {
4924         cj[k]   = cmap[*bj++];
4925         ca[k++] = *ba++;
4926       }
4927     }
4928     /* put together the new matrix */
4929     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4930     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4931     /* Since these are PETSc arrays, change flags to free them as necessary. */
4932     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4933     mat->free_a  = PETSC_TRUE;
4934     mat->free_ij = PETSC_TRUE;
4935     mat->nonew   = 0;
4936   } else if (scall == MAT_REUSE_MATRIX) {
4937     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4938     ci = mat->i; cj = mat->j; cam = mat->a;
4939     for (i=0; i<am; i++) {
4940       /* off-diagonal portion of A */
4941       ncols_o = bi[i+1] - bi[i];
4942       for (jo=0; jo<ncols_o; jo++) {
4943         col = cmap[*bj];
4944         if (col >= cstart) break;
4945         *cam++ = *ba++; bj++;
4946       }
4947       /* diagonal portion of A */
4948       ncols_d = ai[i+1] - ai[i];
4949       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4950       /* off-diagonal portion of A */
4951       for (j=jo; j<ncols_o; j++) {
4952         *cam++ = *ba++; bj++;
4953       }
4954     }
4955   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4956   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4957   PetscFunctionReturn(0);
4958 }
4959 
4960 /*@C
4961      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4962 
4963     Not Collective
4964 
4965    Input Parameters:
4966 +    A - the matrix
4967 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4968 -    row, col - index sets of rows and columns to extract (or NULL)
4969 
4970    Output Parameter:
4971 .    A_loc - the local sequential matrix generated
4972 
4973     Level: developer
4974 
4975 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4976 
4977 @*/
4978 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4979 {
4980   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4981   PetscErrorCode ierr;
4982   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4983   IS             isrowa,iscola;
4984   Mat            *aloc;
4985   PetscBool      match;
4986 
4987   PetscFunctionBegin;
4988   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4989   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4990   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4991   if (!row) {
4992     start = A->rmap->rstart; end = A->rmap->rend;
4993     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4994   } else {
4995     isrowa = *row;
4996   }
4997   if (!col) {
4998     start = A->cmap->rstart;
4999     cmap  = a->garray;
5000     nzA   = a->A->cmap->n;
5001     nzB   = a->B->cmap->n;
5002     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5003     ncols = 0;
5004     for (i=0; i<nzB; i++) {
5005       if (cmap[i] < start) idx[ncols++] = cmap[i];
5006       else break;
5007     }
5008     imark = i;
5009     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5010     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5011     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5012   } else {
5013     iscola = *col;
5014   }
5015   if (scall != MAT_INITIAL_MATRIX) {
5016     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5017     aloc[0] = *A_loc;
5018   }
5019   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5020   *A_loc = aloc[0];
5021   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5022   if (!row) {
5023     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5024   }
5025   if (!col) {
5026     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5027   }
5028   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5029   PetscFunctionReturn(0);
5030 }
5031 
5032 /*@C
5033     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5034 
5035     Collective on Mat
5036 
5037    Input Parameters:
5038 +    A,B - the matrices in mpiaij format
5039 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5040 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5041 
5042    Output Parameter:
5043 +    rowb, colb - index sets of rows and columns of B to extract
5044 -    B_seq - the sequential matrix generated
5045 
5046     Level: developer
5047 
5048 @*/
5049 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5050 {
5051   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5052   PetscErrorCode ierr;
5053   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5054   IS             isrowb,iscolb;
5055   Mat            *bseq=NULL;
5056 
5057   PetscFunctionBegin;
5058   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5059     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5060   }
5061   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5062 
5063   if (scall == MAT_INITIAL_MATRIX) {
5064     start = A->cmap->rstart;
5065     cmap  = a->garray;
5066     nzA   = a->A->cmap->n;
5067     nzB   = a->B->cmap->n;
5068     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5069     ncols = 0;
5070     for (i=0; i<nzB; i++) {  /* row < local row index */
5071       if (cmap[i] < start) idx[ncols++] = cmap[i];
5072       else break;
5073     }
5074     imark = i;
5075     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5076     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5077     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5078     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5079   } else {
5080     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5081     isrowb  = *rowb; iscolb = *colb;
5082     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5083     bseq[0] = *B_seq;
5084   }
5085   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5086   *B_seq = bseq[0];
5087   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5088   if (!rowb) {
5089     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5090   } else {
5091     *rowb = isrowb;
5092   }
5093   if (!colb) {
5094     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5095   } else {
5096     *colb = iscolb;
5097   }
5098   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5099   PetscFunctionReturn(0);
5100 }
5101 
5102 /*
5103     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5104     of the OFF-DIAGONAL portion of local A
5105 
5106     Collective on Mat
5107 
5108    Input Parameters:
5109 +    A,B - the matrices in mpiaij format
5110 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5111 
5112    Output Parameter:
5113 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5114 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5115 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5116 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5117 
5118     Level: developer
5119 
5120 */
5121 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5122 {
5123   VecScatter_MPI_General *gen_to,*gen_from;
5124   PetscErrorCode         ierr;
5125   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5126   Mat_SeqAIJ             *b_oth;
5127   VecScatter             ctx =a->Mvctx;
5128   MPI_Comm               comm;
5129   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5130   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5131   PetscInt               *rvalues,*svalues;
5132   MatScalar              *b_otha,*bufa,*bufA;
5133   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5134   MPI_Request            *rwaits = NULL,*swaits = NULL;
5135   MPI_Status             *sstatus,rstatus;
5136   PetscMPIInt            jj,size;
5137   PetscInt               *cols,sbs,rbs;
5138   PetscScalar            *vals;
5139 
5140   PetscFunctionBegin;
5141   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5142   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5143 
5144   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5145     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5146   }
5147   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5148   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5149 
5150   if (size == 1) {
5151     startsj_s = NULL;
5152     bufa_ptr  = NULL;
5153     *B_oth    = NULL;
5154     PetscFunctionReturn(0);
5155   }
5156 
5157   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5158   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5159   nrecvs   = gen_from->n;
5160   nsends   = gen_to->n;
5161 
5162   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5163   srow    = gen_to->indices;    /* local row index to be sent */
5164   sstarts = gen_to->starts;
5165   sprocs  = gen_to->procs;
5166   sstatus = gen_to->sstatus;
5167   sbs     = gen_to->bs;
5168   rstarts = gen_from->starts;
5169   rprocs  = gen_from->procs;
5170   rbs     = gen_from->bs;
5171 
5172   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5173   if (scall == MAT_INITIAL_MATRIX) {
5174     /* i-array */
5175     /*---------*/
5176     /*  post receives */
5177     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5178     for (i=0; i<nrecvs; i++) {
5179       rowlen = rvalues + rstarts[i]*rbs;
5180       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5181       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5182     }
5183 
5184     /* pack the outgoing message */
5185     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5186 
5187     sstartsj[0] = 0;
5188     rstartsj[0] = 0;
5189     len         = 0; /* total length of j or a array to be sent */
5190     k           = 0;
5191     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5192     for (i=0; i<nsends; i++) {
5193       rowlen = svalues + sstarts[i]*sbs;
5194       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5195       for (j=0; j<nrows; j++) {
5196         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5197         for (l=0; l<sbs; l++) {
5198           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5199 
5200           rowlen[j*sbs+l] = ncols;
5201 
5202           len += ncols;
5203           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5204         }
5205         k++;
5206       }
5207       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5208 
5209       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5210     }
5211     /* recvs and sends of i-array are completed */
5212     i = nrecvs;
5213     while (i--) {
5214       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5215     }
5216     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5217     ierr = PetscFree(svalues);CHKERRQ(ierr);
5218 
5219     /* allocate buffers for sending j and a arrays */
5220     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5221     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5222 
5223     /* create i-array of B_oth */
5224     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5225 
5226     b_othi[0] = 0;
5227     len       = 0; /* total length of j or a array to be received */
5228     k         = 0;
5229     for (i=0; i<nrecvs; i++) {
5230       rowlen = rvalues + rstarts[i]*rbs;
5231       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5232       for (j=0; j<nrows; j++) {
5233         b_othi[k+1] = b_othi[k] + rowlen[j];
5234         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5235         k++;
5236       }
5237       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5238     }
5239     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5240 
5241     /* allocate space for j and a arrrays of B_oth */
5242     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5243     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5244 
5245     /* j-array */
5246     /*---------*/
5247     /*  post receives of j-array */
5248     for (i=0; i<nrecvs; i++) {
5249       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5250       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5251     }
5252 
5253     /* pack the outgoing message j-array */
5254     k = 0;
5255     for (i=0; i<nsends; i++) {
5256       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5257       bufJ  = bufj+sstartsj[i];
5258       for (j=0; j<nrows; j++) {
5259         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5260         for (ll=0; ll<sbs; ll++) {
5261           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5262           for (l=0; l<ncols; l++) {
5263             *bufJ++ = cols[l];
5264           }
5265           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5266         }
5267       }
5268       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5269     }
5270 
5271     /* recvs and sends of j-array are completed */
5272     i = nrecvs;
5273     while (i--) {
5274       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5275     }
5276     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5277   } else if (scall == MAT_REUSE_MATRIX) {
5278     sstartsj = *startsj_s;
5279     rstartsj = *startsj_r;
5280     bufa     = *bufa_ptr;
5281     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5282     b_otha   = b_oth->a;
5283   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5284 
5285   /* a-array */
5286   /*---------*/
5287   /*  post receives of a-array */
5288   for (i=0; i<nrecvs; i++) {
5289     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5290     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5291   }
5292 
5293   /* pack the outgoing message a-array */
5294   k = 0;
5295   for (i=0; i<nsends; i++) {
5296     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5297     bufA  = bufa+sstartsj[i];
5298     for (j=0; j<nrows; j++) {
5299       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5300       for (ll=0; ll<sbs; ll++) {
5301         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5302         for (l=0; l<ncols; l++) {
5303           *bufA++ = vals[l];
5304         }
5305         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5306       }
5307     }
5308     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5309   }
5310   /* recvs and sends of a-array are completed */
5311   i = nrecvs;
5312   while (i--) {
5313     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5314   }
5315   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5316   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5317 
5318   if (scall == MAT_INITIAL_MATRIX) {
5319     /* put together the new matrix */
5320     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5321 
5322     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5323     /* Since these are PETSc arrays, change flags to free them as necessary. */
5324     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5325     b_oth->free_a  = PETSC_TRUE;
5326     b_oth->free_ij = PETSC_TRUE;
5327     b_oth->nonew   = 0;
5328 
5329     ierr = PetscFree(bufj);CHKERRQ(ierr);
5330     if (!startsj_s || !bufa_ptr) {
5331       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5332       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5333     } else {
5334       *startsj_s = sstartsj;
5335       *startsj_r = rstartsj;
5336       *bufa_ptr  = bufa;
5337     }
5338   }
5339   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5340   PetscFunctionReturn(0);
5341 }
5342 
5343 /*@C
5344   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5345 
5346   Not Collective
5347 
5348   Input Parameters:
5349 . A - The matrix in mpiaij format
5350 
5351   Output Parameter:
5352 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5353 . colmap - A map from global column index to local index into lvec
5354 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5355 
5356   Level: developer
5357 
5358 @*/
5359 #if defined(PETSC_USE_CTABLE)
5360 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5361 #else
5362 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5363 #endif
5364 {
5365   Mat_MPIAIJ *a;
5366 
5367   PetscFunctionBegin;
5368   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5369   PetscValidPointer(lvec, 2);
5370   PetscValidPointer(colmap, 3);
5371   PetscValidPointer(multScatter, 4);
5372   a = (Mat_MPIAIJ*) A->data;
5373   if (lvec) *lvec = a->lvec;
5374   if (colmap) *colmap = a->colmap;
5375   if (multScatter) *multScatter = a->Mvctx;
5376   PetscFunctionReturn(0);
5377 }
5378 
5379 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5380 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5381 #if defined(PETSC_HAVE_MKL_SPARSE)
5382 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5383 #endif
5384 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5385 #if defined(PETSC_HAVE_ELEMENTAL)
5386 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5387 #endif
5388 #if defined(PETSC_HAVE_HYPRE)
5389 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5390 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5391 #endif
5392 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5393 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5394 
5395 /*
5396     Computes (B'*A')' since computing B*A directly is untenable
5397 
5398                n                       p                          p
5399         (              )       (              )         (                  )
5400       m (      A       )  *  n (       B      )   =   m (         C        )
5401         (              )       (              )         (                  )
5402 
5403 */
5404 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5405 {
5406   PetscErrorCode ierr;
5407   Mat            At,Bt,Ct;
5408 
5409   PetscFunctionBegin;
5410   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5411   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5412   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5413   ierr = MatDestroy(&At);CHKERRQ(ierr);
5414   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5415   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5416   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5417   PetscFunctionReturn(0);
5418 }
5419 
5420 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5421 {
5422   PetscErrorCode ierr;
5423   PetscInt       m=A->rmap->n,n=B->cmap->n;
5424   Mat            Cmat;
5425 
5426   PetscFunctionBegin;
5427   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5428   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5429   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5430   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5431   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5432   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5433   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5434   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5435 
5436   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5437 
5438   *C = Cmat;
5439   PetscFunctionReturn(0);
5440 }
5441 
5442 /* ----------------------------------------------------------------*/
5443 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5444 {
5445   PetscErrorCode ierr;
5446 
5447   PetscFunctionBegin;
5448   if (scall == MAT_INITIAL_MATRIX) {
5449     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5450     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5451     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5452   }
5453   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5454   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5455   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5456   PetscFunctionReturn(0);
5457 }
5458 
5459 /*MC
5460    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5461 
5462    Options Database Keys:
5463 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5464 
5465   Level: beginner
5466 
5467 .seealso: MatCreateAIJ()
5468 M*/
5469 
5470 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5471 {
5472   Mat_MPIAIJ     *b;
5473   PetscErrorCode ierr;
5474   PetscMPIInt    size;
5475 
5476   PetscFunctionBegin;
5477   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5478 
5479   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5480   B->data       = (void*)b;
5481   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5482   B->assembled  = PETSC_FALSE;
5483   B->insertmode = NOT_SET_VALUES;
5484   b->size       = size;
5485 
5486   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5487 
5488   /* build cache for off array entries formed */
5489   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5490 
5491   b->donotstash  = PETSC_FALSE;
5492   b->colmap      = 0;
5493   b->garray      = 0;
5494   b->roworiented = PETSC_TRUE;
5495 
5496   /* stuff used for matrix vector multiply */
5497   b->lvec  = NULL;
5498   b->Mvctx = NULL;
5499 
5500   /* stuff for MatGetRow() */
5501   b->rowindices   = 0;
5502   b->rowvalues    = 0;
5503   b->getrowactive = PETSC_FALSE;
5504 
5505   /* flexible pointer used in CUSP/CUSPARSE classes */
5506   b->spptr = NULL;
5507 
5508   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5509   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5510   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5511   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5512   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5513   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5514   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5515   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5516   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5517 #if defined(PETSC_HAVE_MKL_SPARSE)
5518   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5519 #endif
5520   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5521   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5522 #if defined(PETSC_HAVE_ELEMENTAL)
5523   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5524 #endif
5525 #if defined(PETSC_HAVE_HYPRE)
5526   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5527 #endif
5528   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5529   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5530   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5531   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5532   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5533 #if defined(PETSC_HAVE_HYPRE)
5534   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5535 #endif
5536   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5537   PetscFunctionReturn(0);
5538 }
5539 
5540 /*@C
5541      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5542          and "off-diagonal" part of the matrix in CSR format.
5543 
5544    Collective on MPI_Comm
5545 
5546    Input Parameters:
5547 +  comm - MPI communicator
5548 .  m - number of local rows (Cannot be PETSC_DECIDE)
5549 .  n - This value should be the same as the local size used in creating the
5550        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5551        calculated if N is given) For square matrices n is almost always m.
5552 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5553 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5554 .   i - row indices for "diagonal" portion of matrix
5555 .   j - column indices
5556 .   a - matrix values
5557 .   oi - row indices for "off-diagonal" portion of matrix
5558 .   oj - column indices
5559 -   oa - matrix values
5560 
5561    Output Parameter:
5562 .   mat - the matrix
5563 
5564    Level: advanced
5565 
5566    Notes:
5567        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5568        must free the arrays once the matrix has been destroyed and not before.
5569 
5570        The i and j indices are 0 based
5571 
5572        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5573 
5574        This sets local rows and cannot be used to set off-processor values.
5575 
5576        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5577        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5578        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5579        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5580        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5581        communication if it is known that only local entries will be set.
5582 
5583 .keywords: matrix, aij, compressed row, sparse, parallel
5584 
5585 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5586           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5587 @*/
5588 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5589 {
5590   PetscErrorCode ierr;
5591   Mat_MPIAIJ     *maij;
5592 
5593   PetscFunctionBegin;
5594   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5595   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5596   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5597   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5598   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5599   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5600   maij = (Mat_MPIAIJ*) (*mat)->data;
5601 
5602   (*mat)->preallocated = PETSC_TRUE;
5603 
5604   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5605   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5606 
5607   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5608   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5609 
5610   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5611   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5612   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5613   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5614 
5615   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5616   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5617   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5618   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5619   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5620   PetscFunctionReturn(0);
5621 }
5622 
5623 /*
5624     Special version for direct calls from Fortran
5625 */
5626 #include <petsc/private/fortranimpl.h>
5627 
5628 /* Change these macros so can be used in void function */
5629 #undef CHKERRQ
5630 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5631 #undef SETERRQ2
5632 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5633 #undef SETERRQ3
5634 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5635 #undef SETERRQ
5636 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5637 
5638 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5639 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5640 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5641 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5642 #else
5643 #endif
5644 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5645 {
5646   Mat            mat  = *mmat;
5647   PetscInt       m    = *mm, n = *mn;
5648   InsertMode     addv = *maddv;
5649   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5650   PetscScalar    value;
5651   PetscErrorCode ierr;
5652 
5653   MatCheckPreallocated(mat,1);
5654   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5655 
5656 #if defined(PETSC_USE_DEBUG)
5657   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5658 #endif
5659   {
5660     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5661     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5662     PetscBool roworiented = aij->roworiented;
5663 
5664     /* Some Variables required in the macro */
5665     Mat        A                 = aij->A;
5666     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5667     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5668     MatScalar  *aa               = a->a;
5669     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5670     Mat        B                 = aij->B;
5671     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5672     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5673     MatScalar  *ba               = b->a;
5674 
5675     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5676     PetscInt  nonew = a->nonew;
5677     MatScalar *ap1,*ap2;
5678 
5679     PetscFunctionBegin;
5680     for (i=0; i<m; i++) {
5681       if (im[i] < 0) continue;
5682 #if defined(PETSC_USE_DEBUG)
5683       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5684 #endif
5685       if (im[i] >= rstart && im[i] < rend) {
5686         row      = im[i] - rstart;
5687         lastcol1 = -1;
5688         rp1      = aj + ai[row];
5689         ap1      = aa + ai[row];
5690         rmax1    = aimax[row];
5691         nrow1    = ailen[row];
5692         low1     = 0;
5693         high1    = nrow1;
5694         lastcol2 = -1;
5695         rp2      = bj + bi[row];
5696         ap2      = ba + bi[row];
5697         rmax2    = bimax[row];
5698         nrow2    = bilen[row];
5699         low2     = 0;
5700         high2    = nrow2;
5701 
5702         for (j=0; j<n; j++) {
5703           if (roworiented) value = v[i*n+j];
5704           else value = v[i+j*m];
5705           if (in[j] >= cstart && in[j] < cend) {
5706             col = in[j] - cstart;
5707             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5708             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5709           } else if (in[j] < 0) continue;
5710 #if defined(PETSC_USE_DEBUG)
5711           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5712 #endif
5713           else {
5714             if (mat->was_assembled) {
5715               if (!aij->colmap) {
5716                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5717               }
5718 #if defined(PETSC_USE_CTABLE)
5719               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5720               col--;
5721 #else
5722               col = aij->colmap[in[j]] - 1;
5723 #endif
5724               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5725               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5726                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5727                 col  =  in[j];
5728                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5729                 B     = aij->B;
5730                 b     = (Mat_SeqAIJ*)B->data;
5731                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5732                 rp2   = bj + bi[row];
5733                 ap2   = ba + bi[row];
5734                 rmax2 = bimax[row];
5735                 nrow2 = bilen[row];
5736                 low2  = 0;
5737                 high2 = nrow2;
5738                 bm    = aij->B->rmap->n;
5739                 ba    = b->a;
5740               }
5741             } else col = in[j];
5742             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5743           }
5744         }
5745       } else if (!aij->donotstash) {
5746         if (roworiented) {
5747           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5748         } else {
5749           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5750         }
5751       }
5752     }
5753   }
5754   PetscFunctionReturnVoid();
5755 }
5756 
5757