xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 19e8fbdabb07dcd9104f2d10b4dec626a2e1a063)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938 
939   PetscFunctionBegin;
940   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
941   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
950 {
951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
952   PetscErrorCode ierr;
953 
954   PetscFunctionBegin;
955   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
956   PetscFunctionReturn(0);
957 }
958 
959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
960 {
961   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
967   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
973 {
974   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
975   PetscErrorCode ierr;
976   PetscBool      merged;
977 
978   PetscFunctionBegin;
979   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
980   /* do nondiagonal part */
981   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
982   if (!merged) {
983     /* send it on its way */
984     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
985     /* do local part */
986     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
987     /* receive remote parts: note this assumes the values are not actually */
988     /* added in yy until the next line, */
989     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
990   } else {
991     /* do local part */
992     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
993     /* send it on its way */
994     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
995     /* values actually were received in the Begin() but we need to call this nop */
996     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1002 {
1003   MPI_Comm       comm;
1004   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1005   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1006   IS             Me,Notme;
1007   PetscErrorCode ierr;
1008   PetscInt       M,N,first,last,*notme,i;
1009   PetscMPIInt    size;
1010 
1011   PetscFunctionBegin;
1012   /* Easy test: symmetric diagonal block */
1013   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1014   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1015   if (!*f) PetscFunctionReturn(0);
1016   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1017   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1018   if (size == 1) PetscFunctionReturn(0);
1019 
1020   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1021   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1022   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1023   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1024   for (i=0; i<first; i++) notme[i] = i;
1025   for (i=last; i<M; i++) notme[i-last+first] = i;
1026   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1027   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1028   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1029   Aoff = Aoffs[0];
1030   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1031   Boff = Boffs[0];
1032   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1033   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1034   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1035   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1036   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1037   ierr = PetscFree(notme);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1042 {
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1051 {
1052   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1053   PetscErrorCode ierr;
1054 
1055   PetscFunctionBegin;
1056   /* do nondiagonal part */
1057   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1058   /* send it on its way */
1059   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1060   /* do local part */
1061   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1062   /* receive remote parts */
1063   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 /*
1068   This only works correctly for square matrices where the subblock A->A is the
1069    diagonal block
1070 */
1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1072 {
1073   PetscErrorCode ierr;
1074   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1075 
1076   PetscFunctionBegin;
1077   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1078   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1079   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1084 {
1085   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1086   PetscErrorCode ierr;
1087 
1088   PetscFunctionBegin;
1089   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1090   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1095 {
1096   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100 #if defined(PETSC_USE_LOG)
1101   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1102 #endif
1103   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1104   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1105   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1106   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1107 #if defined(PETSC_USE_CTABLE)
1108   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1109 #else
1110   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1111 #endif
1112   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1113   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1114   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1115   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1116   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1117   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1118 
1119   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1120   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1127 #if defined(PETSC_HAVE_ELEMENTAL)
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1129 #endif
1130 #if defined(PETSC_HAVE_HYPRE)
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1133 #endif
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1138 {
1139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1140   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1141   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1142   PetscErrorCode ierr;
1143   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1144   int            fd;
1145   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1146   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1147   PetscScalar    *column_values;
1148   PetscInt       message_count,flowcontrolcount;
1149   FILE           *file;
1150 
1151   PetscFunctionBegin;
1152   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1153   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1154   nz   = A->nz + B->nz;
1155   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1156   if (!rank) {
1157     header[0] = MAT_FILE_CLASSID;
1158     header[1] = mat->rmap->N;
1159     header[2] = mat->cmap->N;
1160 
1161     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1162     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1163     /* get largest number of rows any processor has */
1164     rlen  = mat->rmap->n;
1165     range = mat->rmap->range;
1166     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1167   } else {
1168     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1169     rlen = mat->rmap->n;
1170   }
1171 
1172   /* load up the local row counts */
1173   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1174   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1175 
1176   /* store the row lengths to the file */
1177   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1178   if (!rank) {
1179     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1180     for (i=1; i<size; i++) {
1181       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1182       rlen = range[i+1] - range[i];
1183       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1184       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1185     }
1186     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1187   } else {
1188     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1189     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1190     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1191   }
1192   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1193 
1194   /* load up the local column indices */
1195   nzmax = nz; /* th processor needs space a largest processor needs */
1196   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1197   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1198   cnt   = 0;
1199   for (i=0; i<mat->rmap->n; i++) {
1200     for (j=B->i[i]; j<B->i[i+1]; j++) {
1201       if ((col = garray[B->j[j]]) > cstart) break;
1202       column_indices[cnt++] = col;
1203     }
1204     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1205     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1206   }
1207   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1208 
1209   /* store the column indices to the file */
1210   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1211   if (!rank) {
1212     MPI_Status status;
1213     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1214     for (i=1; i<size; i++) {
1215       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1216       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1217       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1218       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     }
1221     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1222   } else {
1223     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1224     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1225     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1227   }
1228   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1229 
1230   /* load up the local column values */
1231   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1232   cnt  = 0;
1233   for (i=0; i<mat->rmap->n; i++) {
1234     for (j=B->i[i]; j<B->i[i+1]; j++) {
1235       if (garray[B->j[j]] > cstart) break;
1236       column_values[cnt++] = B->a[j];
1237     }
1238     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1239     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1240   }
1241   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1242 
1243   /* store the column values to the file */
1244   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1245   if (!rank) {
1246     MPI_Status status;
1247     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1248     for (i=1; i<size; i++) {
1249       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1250       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1251       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1252       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1253       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1254     }
1255     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1256   } else {
1257     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1258     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1259     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1260     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1261   }
1262   ierr = PetscFree(column_values);CHKERRQ(ierr);
1263 
1264   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1265   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #include <petscdraw.h>
1270 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1271 {
1272   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1273   PetscErrorCode    ierr;
1274   PetscMPIInt       rank = aij->rank,size = aij->size;
1275   PetscBool         isdraw,iascii,isbinary;
1276   PetscViewer       sviewer;
1277   PetscViewerFormat format;
1278 
1279   PetscFunctionBegin;
1280   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1281   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1282   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1283   if (iascii) {
1284     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1285     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1286       MatInfo   info;
1287       PetscBool inodes;
1288 
1289       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1290       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1291       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1292       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1293       if (!inodes) {
1294         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1295                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1296       } else {
1297         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1298                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1299       }
1300       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1301       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1302       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1303       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1304       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1306       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1307       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1308       PetscFunctionReturn(0);
1309     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1310       PetscInt inodecount,inodelimit,*inodes;
1311       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1312       if (inodes) {
1313         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1314       } else {
1315         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1316       }
1317       PetscFunctionReturn(0);
1318     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1319       PetscFunctionReturn(0);
1320     }
1321   } else if (isbinary) {
1322     if (size == 1) {
1323       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1324       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1325     } else {
1326       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1327     }
1328     PetscFunctionReturn(0);
1329   } else if (isdraw) {
1330     PetscDraw draw;
1331     PetscBool isnull;
1332     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1333     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1334     if (isnull) PetscFunctionReturn(0);
1335   }
1336 
1337   {
1338     /* assemble the entire matrix onto first processor. */
1339     Mat        A;
1340     Mat_SeqAIJ *Aloc;
1341     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1342     MatScalar  *a;
1343 
1344     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1345     if (!rank) {
1346       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1347     } else {
1348       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1349     }
1350     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1351     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1352     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1353     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1354     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1355 
1356     /* copy over the A part */
1357     Aloc = (Mat_SeqAIJ*)aij->A->data;
1358     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1359     row  = mat->rmap->rstart;
1360     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1361     for (i=0; i<m; i++) {
1362       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1363       row++;
1364       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1365     }
1366     aj = Aloc->j;
1367     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1368 
1369     /* copy over the B part */
1370     Aloc = (Mat_SeqAIJ*)aij->B->data;
1371     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1372     row  = mat->rmap->rstart;
1373     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1374     ct   = cols;
1375     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1376     for (i=0; i<m; i++) {
1377       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1378       row++;
1379       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1380     }
1381     ierr = PetscFree(ct);CHKERRQ(ierr);
1382     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1383     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1384     /*
1385        Everyone has to call to draw the matrix since the graphics waits are
1386        synchronized across all processors that share the PetscDraw object
1387     */
1388     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1389     if (!rank) {
1390       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1391       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1392     }
1393     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1394     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1395     ierr = MatDestroy(&A);CHKERRQ(ierr);
1396   }
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1401 {
1402   PetscErrorCode ierr;
1403   PetscBool      iascii,isdraw,issocket,isbinary;
1404 
1405   PetscFunctionBegin;
1406   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1407   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1408   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1409   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1410   if (iascii || isdraw || isbinary || issocket) {
1411     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1412   }
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1417 {
1418   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1419   PetscErrorCode ierr;
1420   Vec            bb1 = 0;
1421   PetscBool      hasop;
1422 
1423   PetscFunctionBegin;
1424   if (flag == SOR_APPLY_UPPER) {
1425     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1426     PetscFunctionReturn(0);
1427   }
1428 
1429   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1430     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1431   }
1432 
1433   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1434     if (flag & SOR_ZERO_INITIAL_GUESS) {
1435       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1436       its--;
1437     }
1438 
1439     while (its--) {
1440       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1441       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1442 
1443       /* update rhs: bb1 = bb - B*x */
1444       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1445       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1446 
1447       /* local sweep */
1448       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1449     }
1450   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1451     if (flag & SOR_ZERO_INITIAL_GUESS) {
1452       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1453       its--;
1454     }
1455     while (its--) {
1456       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1457       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1458 
1459       /* update rhs: bb1 = bb - B*x */
1460       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1461       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1462 
1463       /* local sweep */
1464       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1465     }
1466   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1467     if (flag & SOR_ZERO_INITIAL_GUESS) {
1468       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1469       its--;
1470     }
1471     while (its--) {
1472       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1473       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1474 
1475       /* update rhs: bb1 = bb - B*x */
1476       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1477       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1478 
1479       /* local sweep */
1480       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1481     }
1482   } else if (flag & SOR_EISENSTAT) {
1483     Vec xx1;
1484 
1485     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1486     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1487 
1488     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1489     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1490     if (!mat->diag) {
1491       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1492       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1493     }
1494     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1495     if (hasop) {
1496       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1497     } else {
1498       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1499     }
1500     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1501 
1502     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1503 
1504     /* local sweep */
1505     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1506     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1507     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1508   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1509 
1510   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1511 
1512   matin->factorerrortype = mat->A->factorerrortype;
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1517 {
1518   Mat            aA,aB,Aperm;
1519   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1520   PetscScalar    *aa,*ba;
1521   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1522   PetscSF        rowsf,sf;
1523   IS             parcolp = NULL;
1524   PetscBool      done;
1525   PetscErrorCode ierr;
1526 
1527   PetscFunctionBegin;
1528   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1529   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1530   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1531   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1532 
1533   /* Invert row permutation to find out where my rows should go */
1534   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1535   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1536   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1537   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1538   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1539   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1540 
1541   /* Invert column permutation to find out where my columns should go */
1542   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1543   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1544   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1545   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1546   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1547   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1548   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1549 
1550   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1551   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1552   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1553 
1554   /* Find out where my gcols should go */
1555   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1556   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1557   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1558   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1559   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1560   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1561   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1562   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1563 
1564   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1565   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1566   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1567   for (i=0; i<m; i++) {
1568     PetscInt row = rdest[i],rowner;
1569     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1570     for (j=ai[i]; j<ai[i+1]; j++) {
1571       PetscInt cowner,col = cdest[aj[j]];
1572       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1573       if (rowner == cowner) dnnz[i]++;
1574       else onnz[i]++;
1575     }
1576     for (j=bi[i]; j<bi[i+1]; j++) {
1577       PetscInt cowner,col = gcdest[bj[j]];
1578       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1579       if (rowner == cowner) dnnz[i]++;
1580       else onnz[i]++;
1581     }
1582   }
1583   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1584   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1585   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1586   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1587   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1588 
1589   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1590   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1591   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1592   for (i=0; i<m; i++) {
1593     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1594     PetscInt j0,rowlen;
1595     rowlen = ai[i+1] - ai[i];
1596     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1597       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1598       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1599     }
1600     rowlen = bi[i+1] - bi[i];
1601     for (j0=j=0; j<rowlen; j0=j) {
1602       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1603       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1604     }
1605   }
1606   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1607   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1608   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1609   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1610   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1611   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1612   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1613   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1614   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1615   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1616   *B = Aperm;
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1621 {
1622   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1623   PetscErrorCode ierr;
1624 
1625   PetscFunctionBegin;
1626   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1627   if (ghosts) *ghosts = aij->garray;
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1632 {
1633   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1634   Mat            A    = mat->A,B = mat->B;
1635   PetscErrorCode ierr;
1636   PetscReal      isend[5],irecv[5];
1637 
1638   PetscFunctionBegin;
1639   info->block_size = 1.0;
1640   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1641 
1642   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1643   isend[3] = info->memory;  isend[4] = info->mallocs;
1644 
1645   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1646 
1647   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1648   isend[3] += info->memory;  isend[4] += info->mallocs;
1649   if (flag == MAT_LOCAL) {
1650     info->nz_used      = isend[0];
1651     info->nz_allocated = isend[1];
1652     info->nz_unneeded  = isend[2];
1653     info->memory       = isend[3];
1654     info->mallocs      = isend[4];
1655   } else if (flag == MAT_GLOBAL_MAX) {
1656     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1657 
1658     info->nz_used      = irecv[0];
1659     info->nz_allocated = irecv[1];
1660     info->nz_unneeded  = irecv[2];
1661     info->memory       = irecv[3];
1662     info->mallocs      = irecv[4];
1663   } else if (flag == MAT_GLOBAL_SUM) {
1664     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1665 
1666     info->nz_used      = irecv[0];
1667     info->nz_allocated = irecv[1];
1668     info->nz_unneeded  = irecv[2];
1669     info->memory       = irecv[3];
1670     info->mallocs      = irecv[4];
1671   }
1672   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1673   info->fill_ratio_needed = 0;
1674   info->factor_mallocs    = 0;
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1679 {
1680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1681   PetscErrorCode ierr;
1682 
1683   PetscFunctionBegin;
1684   switch (op) {
1685   case MAT_NEW_NONZERO_LOCATIONS:
1686   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1687   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1688   case MAT_KEEP_NONZERO_PATTERN:
1689   case MAT_NEW_NONZERO_LOCATION_ERR:
1690   case MAT_USE_INODES:
1691   case MAT_IGNORE_ZERO_ENTRIES:
1692     MatCheckPreallocated(A,1);
1693     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1694     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1695     break;
1696   case MAT_ROW_ORIENTED:
1697     MatCheckPreallocated(A,1);
1698     a->roworiented = flg;
1699 
1700     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1701     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1702     break;
1703   case MAT_NEW_DIAGONALS:
1704     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1705     break;
1706   case MAT_IGNORE_OFF_PROC_ENTRIES:
1707     a->donotstash = flg;
1708     break;
1709   case MAT_SPD:
1710     A->spd_set = PETSC_TRUE;
1711     A->spd     = flg;
1712     if (flg) {
1713       A->symmetric                  = PETSC_TRUE;
1714       A->structurally_symmetric     = PETSC_TRUE;
1715       A->symmetric_set              = PETSC_TRUE;
1716       A->structurally_symmetric_set = PETSC_TRUE;
1717     }
1718     break;
1719   case MAT_SYMMETRIC:
1720     MatCheckPreallocated(A,1);
1721     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1722     break;
1723   case MAT_STRUCTURALLY_SYMMETRIC:
1724     MatCheckPreallocated(A,1);
1725     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1726     break;
1727   case MAT_HERMITIAN:
1728     MatCheckPreallocated(A,1);
1729     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1730     break;
1731   case MAT_SYMMETRY_ETERNAL:
1732     MatCheckPreallocated(A,1);
1733     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1734     break;
1735   case MAT_SUBMAT_SINGLEIS:
1736     A->submat_singleis = flg;
1737     break;
1738   case MAT_STRUCTURE_ONLY:
1739     /* The option is handled directly by MatSetOption() */
1740     break;
1741   default:
1742     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1743   }
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1748 {
1749   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1750   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1751   PetscErrorCode ierr;
1752   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1753   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1754   PetscInt       *cmap,*idx_p;
1755 
1756   PetscFunctionBegin;
1757   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1758   mat->getrowactive = PETSC_TRUE;
1759 
1760   if (!mat->rowvalues && (idx || v)) {
1761     /*
1762         allocate enough space to hold information from the longest row.
1763     */
1764     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1765     PetscInt   max = 1,tmp;
1766     for (i=0; i<matin->rmap->n; i++) {
1767       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1768       if (max < tmp) max = tmp;
1769     }
1770     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1771   }
1772 
1773   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1774   lrow = row - rstart;
1775 
1776   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1777   if (!v)   {pvA = 0; pvB = 0;}
1778   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1779   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1780   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1781   nztot = nzA + nzB;
1782 
1783   cmap = mat->garray;
1784   if (v  || idx) {
1785     if (nztot) {
1786       /* Sort by increasing column numbers, assuming A and B already sorted */
1787       PetscInt imark = -1;
1788       if (v) {
1789         *v = v_p = mat->rowvalues;
1790         for (i=0; i<nzB; i++) {
1791           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1792           else break;
1793         }
1794         imark = i;
1795         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1796         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1797       }
1798       if (idx) {
1799         *idx = idx_p = mat->rowindices;
1800         if (imark > -1) {
1801           for (i=0; i<imark; i++) {
1802             idx_p[i] = cmap[cworkB[i]];
1803           }
1804         } else {
1805           for (i=0; i<nzB; i++) {
1806             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1807             else break;
1808           }
1809           imark = i;
1810         }
1811         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1812         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1813       }
1814     } else {
1815       if (idx) *idx = 0;
1816       if (v)   *v   = 0;
1817     }
1818   }
1819   *nz  = nztot;
1820   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1821   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1826 {
1827   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1828 
1829   PetscFunctionBegin;
1830   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1831   aij->getrowactive = PETSC_FALSE;
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1836 {
1837   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1838   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1839   PetscErrorCode ierr;
1840   PetscInt       i,j,cstart = mat->cmap->rstart;
1841   PetscReal      sum = 0.0;
1842   MatScalar      *v;
1843 
1844   PetscFunctionBegin;
1845   if (aij->size == 1) {
1846     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1847   } else {
1848     if (type == NORM_FROBENIUS) {
1849       v = amat->a;
1850       for (i=0; i<amat->nz; i++) {
1851         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1852       }
1853       v = bmat->a;
1854       for (i=0; i<bmat->nz; i++) {
1855         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1856       }
1857       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1858       *norm = PetscSqrtReal(*norm);
1859       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1860     } else if (type == NORM_1) { /* max column norm */
1861       PetscReal *tmp,*tmp2;
1862       PetscInt  *jj,*garray = aij->garray;
1863       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1864       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1865       *norm = 0.0;
1866       v     = amat->a; jj = amat->j;
1867       for (j=0; j<amat->nz; j++) {
1868         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1869       }
1870       v = bmat->a; jj = bmat->j;
1871       for (j=0; j<bmat->nz; j++) {
1872         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1873       }
1874       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1875       for (j=0; j<mat->cmap->N; j++) {
1876         if (tmp2[j] > *norm) *norm = tmp2[j];
1877       }
1878       ierr = PetscFree(tmp);CHKERRQ(ierr);
1879       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1880       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1881     } else if (type == NORM_INFINITY) { /* max row norm */
1882       PetscReal ntemp = 0.0;
1883       for (j=0; j<aij->A->rmap->n; j++) {
1884         v   = amat->a + amat->i[j];
1885         sum = 0.0;
1886         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1887           sum += PetscAbsScalar(*v); v++;
1888         }
1889         v = bmat->a + bmat->i[j];
1890         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1891           sum += PetscAbsScalar(*v); v++;
1892         }
1893         if (sum > ntemp) ntemp = sum;
1894       }
1895       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1896       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1897     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1903 {
1904   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1905   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1906   PetscErrorCode ierr;
1907   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1908   PetscInt       cstart = A->cmap->rstart,ncol;
1909   Mat            B;
1910   MatScalar      *array;
1911 
1912   PetscFunctionBegin;
1913   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1914 
1915   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1916   ai = Aloc->i; aj = Aloc->j;
1917   bi = Bloc->i; bj = Bloc->j;
1918   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1919     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1920     PetscSFNode          *oloc;
1921     PETSC_UNUSED PetscSF sf;
1922 
1923     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1924     /* compute d_nnz for preallocation */
1925     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1926     for (i=0; i<ai[ma]; i++) {
1927       d_nnz[aj[i]]++;
1928       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1929     }
1930     /* compute local off-diagonal contributions */
1931     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1932     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1933     /* map those to global */
1934     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1935     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1936     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1937     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1938     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1939     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1940     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1941 
1942     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1943     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1944     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1945     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1946     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1947     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1948   } else {
1949     B    = *matout;
1950     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1951     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1952   }
1953 
1954   /* copy over the A part */
1955   array = Aloc->a;
1956   row   = A->rmap->rstart;
1957   for (i=0; i<ma; i++) {
1958     ncol = ai[i+1]-ai[i];
1959     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1960     row++;
1961     array += ncol; aj += ncol;
1962   }
1963   aj = Aloc->j;
1964   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1965 
1966   /* copy over the B part */
1967   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1968   array = Bloc->a;
1969   row   = A->rmap->rstart;
1970   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1971   cols_tmp = cols;
1972   for (i=0; i<mb; i++) {
1973     ncol = bi[i+1]-bi[i];
1974     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1975     row++;
1976     array += ncol; cols_tmp += ncol;
1977   }
1978   ierr = PetscFree(cols);CHKERRQ(ierr);
1979 
1980   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1981   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1982   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1983     *matout = B;
1984   } else {
1985     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1986   }
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1991 {
1992   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1993   Mat            a    = aij->A,b = aij->B;
1994   PetscErrorCode ierr;
1995   PetscInt       s1,s2,s3;
1996 
1997   PetscFunctionBegin;
1998   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1999   if (rr) {
2000     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2001     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2002     /* Overlap communication with computation. */
2003     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2004   }
2005   if (ll) {
2006     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2007     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2008     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2009   }
2010   /* scale  the diagonal block */
2011   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2012 
2013   if (rr) {
2014     /* Do a scatter end and then right scale the off-diagonal block */
2015     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2016     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2017   }
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2022 {
2023   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2024   PetscErrorCode ierr;
2025 
2026   PetscFunctionBegin;
2027   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2032 {
2033   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2034   Mat            a,b,c,d;
2035   PetscBool      flg;
2036   PetscErrorCode ierr;
2037 
2038   PetscFunctionBegin;
2039   a = matA->A; b = matA->B;
2040   c = matB->A; d = matB->B;
2041 
2042   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2043   if (flg) {
2044     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2045   }
2046   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2051 {
2052   PetscErrorCode ierr;
2053   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2054   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2055 
2056   PetscFunctionBegin;
2057   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2058   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2059     /* because of the column compression in the off-processor part of the matrix a->B,
2060        the number of columns in a->B and b->B may be different, hence we cannot call
2061        the MatCopy() directly on the two parts. If need be, we can provide a more
2062        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2063        then copying the submatrices */
2064     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2065   } else {
2066     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2067     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2068   }
2069   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2074 {
2075   PetscErrorCode ierr;
2076 
2077   PetscFunctionBegin;
2078   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 /*
2083    Computes the number of nonzeros per row needed for preallocation when X and Y
2084    have different nonzero structure.
2085 */
2086 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2087 {
2088   PetscInt       i,j,k,nzx,nzy;
2089 
2090   PetscFunctionBegin;
2091   /* Set the number of nonzeros in the new matrix */
2092   for (i=0; i<m; i++) {
2093     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2094     nzx = xi[i+1] - xi[i];
2095     nzy = yi[i+1] - yi[i];
2096     nnz[i] = 0;
2097     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2098       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2099       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2100       nnz[i]++;
2101     }
2102     for (; k<nzy; k++) nnz[i]++;
2103   }
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2108 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2109 {
2110   PetscErrorCode ierr;
2111   PetscInt       m = Y->rmap->N;
2112   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2113   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2114 
2115   PetscFunctionBegin;
2116   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2121 {
2122   PetscErrorCode ierr;
2123   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2124   PetscBLASInt   bnz,one=1;
2125   Mat_SeqAIJ     *x,*y;
2126 
2127   PetscFunctionBegin;
2128   if (str == SAME_NONZERO_PATTERN) {
2129     PetscScalar alpha = a;
2130     x    = (Mat_SeqAIJ*)xx->A->data;
2131     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2132     y    = (Mat_SeqAIJ*)yy->A->data;
2133     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2134     x    = (Mat_SeqAIJ*)xx->B->data;
2135     y    = (Mat_SeqAIJ*)yy->B->data;
2136     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2137     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2138     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2139   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2140     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2141   } else {
2142     Mat      B;
2143     PetscInt *nnz_d,*nnz_o;
2144     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2145     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2146     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2147     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2148     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2149     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2150     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2151     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2152     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2153     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2154     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2155     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2156     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2157     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2158   }
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2163 
2164 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2165 {
2166 #if defined(PETSC_USE_COMPLEX)
2167   PetscErrorCode ierr;
2168   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2169 
2170   PetscFunctionBegin;
2171   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2172   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2173 #else
2174   PetscFunctionBegin;
2175 #endif
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2180 {
2181   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2182   PetscErrorCode ierr;
2183 
2184   PetscFunctionBegin;
2185   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2186   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2191 {
2192   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2193   PetscErrorCode ierr;
2194 
2195   PetscFunctionBegin;
2196   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2197   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2198   PetscFunctionReturn(0);
2199 }
2200 
2201 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2202 {
2203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2204   PetscErrorCode ierr;
2205   PetscInt       i,*idxb = 0;
2206   PetscScalar    *va,*vb;
2207   Vec            vtmp;
2208 
2209   PetscFunctionBegin;
2210   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2211   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2212   if (idx) {
2213     for (i=0; i<A->rmap->n; i++) {
2214       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2215     }
2216   }
2217 
2218   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2219   if (idx) {
2220     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2221   }
2222   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2223   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2224 
2225   for (i=0; i<A->rmap->n; i++) {
2226     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2227       va[i] = vb[i];
2228       if (idx) idx[i] = a->garray[idxb[i]];
2229     }
2230   }
2231 
2232   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2233   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2234   ierr = PetscFree(idxb);CHKERRQ(ierr);
2235   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2240 {
2241   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2242   PetscErrorCode ierr;
2243   PetscInt       i,*idxb = 0;
2244   PetscScalar    *va,*vb;
2245   Vec            vtmp;
2246 
2247   PetscFunctionBegin;
2248   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2249   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2250   if (idx) {
2251     for (i=0; i<A->cmap->n; i++) {
2252       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2253     }
2254   }
2255 
2256   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2257   if (idx) {
2258     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2259   }
2260   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2261   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2262 
2263   for (i=0; i<A->rmap->n; i++) {
2264     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2265       va[i] = vb[i];
2266       if (idx) idx[i] = a->garray[idxb[i]];
2267     }
2268   }
2269 
2270   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2271   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2272   ierr = PetscFree(idxb);CHKERRQ(ierr);
2273   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2278 {
2279   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2280   PetscInt       n      = A->rmap->n;
2281   PetscInt       cstart = A->cmap->rstart;
2282   PetscInt       *cmap  = mat->garray;
2283   PetscInt       *diagIdx, *offdiagIdx;
2284   Vec            diagV, offdiagV;
2285   PetscScalar    *a, *diagA, *offdiagA;
2286   PetscInt       r;
2287   PetscErrorCode ierr;
2288 
2289   PetscFunctionBegin;
2290   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2291   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2292   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2293   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2294   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2295   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2296   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2297   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2298   for (r = 0; r < n; ++r) {
2299     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2300       a[r]   = diagA[r];
2301       idx[r] = cstart + diagIdx[r];
2302     } else {
2303       a[r]   = offdiagA[r];
2304       idx[r] = cmap[offdiagIdx[r]];
2305     }
2306   }
2307   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2308   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2309   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2310   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2311   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2312   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2317 {
2318   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2319   PetscInt       n      = A->rmap->n;
2320   PetscInt       cstart = A->cmap->rstart;
2321   PetscInt       *cmap  = mat->garray;
2322   PetscInt       *diagIdx, *offdiagIdx;
2323   Vec            diagV, offdiagV;
2324   PetscScalar    *a, *diagA, *offdiagA;
2325   PetscInt       r;
2326   PetscErrorCode ierr;
2327 
2328   PetscFunctionBegin;
2329   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2330   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2331   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2332   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2333   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2334   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2335   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2336   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2337   for (r = 0; r < n; ++r) {
2338     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2339       a[r]   = diagA[r];
2340       idx[r] = cstart + diagIdx[r];
2341     } else {
2342       a[r]   = offdiagA[r];
2343       idx[r] = cmap[offdiagIdx[r]];
2344     }
2345   }
2346   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2347   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2348   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2349   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2350   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2351   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2356 {
2357   PetscErrorCode ierr;
2358   Mat            *dummy;
2359 
2360   PetscFunctionBegin;
2361   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2362   *newmat = *dummy;
2363   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2368 {
2369   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2370   PetscErrorCode ierr;
2371 
2372   PetscFunctionBegin;
2373   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2374   A->factorerrortype = a->A->factorerrortype;
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2379 {
2380   PetscErrorCode ierr;
2381   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2382 
2383   PetscFunctionBegin;
2384   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2385   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2386   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2387   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2392 {
2393   PetscFunctionBegin;
2394   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2395   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 /*@
2400    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2401 
2402    Collective on Mat
2403 
2404    Input Parameters:
2405 +    A - the matrix
2406 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2407 
2408  Level: advanced
2409 
2410 @*/
2411 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2412 {
2413   PetscErrorCode       ierr;
2414 
2415   PetscFunctionBegin;
2416   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2421 {
2422   PetscErrorCode       ierr;
2423   PetscBool            sc = PETSC_FALSE,flg;
2424 
2425   PetscFunctionBegin;
2426   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2427   ierr = PetscObjectOptionsBegin((PetscObject)A);
2428     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2429     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2430     if (flg) {
2431       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2432     }
2433   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2438 {
2439   PetscErrorCode ierr;
2440   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2441   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2442 
2443   PetscFunctionBegin;
2444   if (!Y->preallocated) {
2445     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2446   } else if (!aij->nz) {
2447     PetscInt nonew = aij->nonew;
2448     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2449     aij->nonew = nonew;
2450   }
2451   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2456 {
2457   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2458   PetscErrorCode ierr;
2459 
2460   PetscFunctionBegin;
2461   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2462   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2463   if (d) {
2464     PetscInt rstart;
2465     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2466     *d += rstart;
2467 
2468   }
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 
2473 /* -------------------------------------------------------------------*/
2474 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2475                                        MatGetRow_MPIAIJ,
2476                                        MatRestoreRow_MPIAIJ,
2477                                        MatMult_MPIAIJ,
2478                                 /* 4*/ MatMultAdd_MPIAIJ,
2479                                        MatMultTranspose_MPIAIJ,
2480                                        MatMultTransposeAdd_MPIAIJ,
2481                                        0,
2482                                        0,
2483                                        0,
2484                                 /*10*/ 0,
2485                                        0,
2486                                        0,
2487                                        MatSOR_MPIAIJ,
2488                                        MatTranspose_MPIAIJ,
2489                                 /*15*/ MatGetInfo_MPIAIJ,
2490                                        MatEqual_MPIAIJ,
2491                                        MatGetDiagonal_MPIAIJ,
2492                                        MatDiagonalScale_MPIAIJ,
2493                                        MatNorm_MPIAIJ,
2494                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2495                                        MatAssemblyEnd_MPIAIJ,
2496                                        MatSetOption_MPIAIJ,
2497                                        MatZeroEntries_MPIAIJ,
2498                                 /*24*/ MatZeroRows_MPIAIJ,
2499                                        0,
2500                                        0,
2501                                        0,
2502                                        0,
2503                                 /*29*/ MatSetUp_MPIAIJ,
2504                                        0,
2505                                        0,
2506                                        MatGetDiagonalBlock_MPIAIJ,
2507                                        0,
2508                                 /*34*/ MatDuplicate_MPIAIJ,
2509                                        0,
2510                                        0,
2511                                        0,
2512                                        0,
2513                                 /*39*/ MatAXPY_MPIAIJ,
2514                                        MatCreateSubMatrices_MPIAIJ,
2515                                        MatIncreaseOverlap_MPIAIJ,
2516                                        MatGetValues_MPIAIJ,
2517                                        MatCopy_MPIAIJ,
2518                                 /*44*/ MatGetRowMax_MPIAIJ,
2519                                        MatScale_MPIAIJ,
2520                                        MatShift_MPIAIJ,
2521                                        MatDiagonalSet_MPIAIJ,
2522                                        MatZeroRowsColumns_MPIAIJ,
2523                                 /*49*/ MatSetRandom_MPIAIJ,
2524                                        0,
2525                                        0,
2526                                        0,
2527                                        0,
2528                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2529                                        0,
2530                                        MatSetUnfactored_MPIAIJ,
2531                                        MatPermute_MPIAIJ,
2532                                        0,
2533                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2534                                        MatDestroy_MPIAIJ,
2535                                        MatView_MPIAIJ,
2536                                        0,
2537                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2538                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2539                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2540                                        0,
2541                                        0,
2542                                        0,
2543                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2544                                        MatGetRowMinAbs_MPIAIJ,
2545                                        0,
2546                                        0,
2547                                        0,
2548                                        0,
2549                                 /*75*/ MatFDColoringApply_AIJ,
2550                                        MatSetFromOptions_MPIAIJ,
2551                                        0,
2552                                        0,
2553                                        MatFindZeroDiagonals_MPIAIJ,
2554                                 /*80*/ 0,
2555                                        0,
2556                                        0,
2557                                 /*83*/ MatLoad_MPIAIJ,
2558                                        MatIsSymmetric_MPIAIJ,
2559                                        0,
2560                                        0,
2561                                        0,
2562                                        0,
2563                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2564                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2565                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2566                                        MatPtAP_MPIAIJ_MPIAIJ,
2567                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2568                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2569                                        0,
2570                                        0,
2571                                        0,
2572                                        0,
2573                                 /*99*/ 0,
2574                                        0,
2575                                        0,
2576                                        MatConjugate_MPIAIJ,
2577                                        0,
2578                                 /*104*/MatSetValuesRow_MPIAIJ,
2579                                        MatRealPart_MPIAIJ,
2580                                        MatImaginaryPart_MPIAIJ,
2581                                        0,
2582                                        0,
2583                                 /*109*/0,
2584                                        0,
2585                                        MatGetRowMin_MPIAIJ,
2586                                        0,
2587                                        MatMissingDiagonal_MPIAIJ,
2588                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2589                                        0,
2590                                        MatGetGhosts_MPIAIJ,
2591                                        0,
2592                                        0,
2593                                 /*119*/0,
2594                                        0,
2595                                        0,
2596                                        0,
2597                                        MatGetMultiProcBlock_MPIAIJ,
2598                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2599                                        MatGetColumnNorms_MPIAIJ,
2600                                        MatInvertBlockDiagonal_MPIAIJ,
2601                                        0,
2602                                        MatCreateSubMatricesMPI_MPIAIJ,
2603                                 /*129*/0,
2604                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2605                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2606                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2607                                        0,
2608                                 /*134*/0,
2609                                        0,
2610                                        MatRARt_MPIAIJ_MPIAIJ,
2611                                        0,
2612                                        0,
2613                                 /*139*/MatSetBlockSizes_MPIAIJ,
2614                                        0,
2615                                        0,
2616                                        MatFDColoringSetUp_MPIXAIJ,
2617                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2618                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2619 };
2620 
2621 /* ----------------------------------------------------------------------------------------*/
2622 
2623 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2624 {
2625   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2626   PetscErrorCode ierr;
2627 
2628   PetscFunctionBegin;
2629   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2630   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2631   PetscFunctionReturn(0);
2632 }
2633 
2634 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2635 {
2636   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2637   PetscErrorCode ierr;
2638 
2639   PetscFunctionBegin;
2640   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2641   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2642   PetscFunctionReturn(0);
2643 }
2644 
2645 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2646 {
2647   Mat_MPIAIJ     *b;
2648   PetscErrorCode ierr;
2649 
2650   PetscFunctionBegin;
2651   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2652   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2653   b = (Mat_MPIAIJ*)B->data;
2654 
2655 #if defined(PETSC_USE_CTABLE)
2656   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2657 #else
2658   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2659 #endif
2660   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2661   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2662   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2663 
2664   /* Because the B will have been resized we simply destroy it and create a new one each time */
2665   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2666   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2667   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2668   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2669   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2670   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2671 
2672   if (!B->preallocated) {
2673     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2674     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2675     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2676     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2677     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2678   }
2679 
2680   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2681   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2682   B->preallocated  = PETSC_TRUE;
2683   B->was_assembled = PETSC_FALSE;
2684   B->assembled     = PETSC_FALSE;;
2685   PetscFunctionReturn(0);
2686 }
2687 
2688 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2689 {
2690   Mat            mat;
2691   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2692   PetscErrorCode ierr;
2693 
2694   PetscFunctionBegin;
2695   *newmat = 0;
2696   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2697   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2698   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2699   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2700   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2701   a       = (Mat_MPIAIJ*)mat->data;
2702 
2703   mat->factortype   = matin->factortype;
2704   mat->assembled    = PETSC_TRUE;
2705   mat->insertmode   = NOT_SET_VALUES;
2706   mat->preallocated = PETSC_TRUE;
2707 
2708   a->size         = oldmat->size;
2709   a->rank         = oldmat->rank;
2710   a->donotstash   = oldmat->donotstash;
2711   a->roworiented  = oldmat->roworiented;
2712   a->rowindices   = 0;
2713   a->rowvalues    = 0;
2714   a->getrowactive = PETSC_FALSE;
2715 
2716   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2717   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2718 
2719   if (oldmat->colmap) {
2720 #if defined(PETSC_USE_CTABLE)
2721     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2722 #else
2723     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2724     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2725     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2726 #endif
2727   } else a->colmap = 0;
2728   if (oldmat->garray) {
2729     PetscInt len;
2730     len  = oldmat->B->cmap->n;
2731     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2732     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2733     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2734   } else a->garray = 0;
2735 
2736   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2737   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2738   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2739   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2740   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2741   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2742   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2743   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2744   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2745   *newmat = mat;
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2750 {
2751   PetscScalar    *vals,*svals;
2752   MPI_Comm       comm;
2753   PetscErrorCode ierr;
2754   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2755   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2756   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2757   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2758   PetscInt       cend,cstart,n,*rowners;
2759   int            fd;
2760   PetscInt       bs = newMat->rmap->bs;
2761 
2762   PetscFunctionBegin;
2763   /* force binary viewer to load .info file if it has not yet done so */
2764   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2765   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2766   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2767   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2768   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2769   if (!rank) {
2770     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2771     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2772     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2773   }
2774 
2775   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2776   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2777   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2778   if (bs < 0) bs = 1;
2779 
2780   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2781   M    = header[1]; N = header[2];
2782 
2783   /* If global sizes are set, check if they are consistent with that given in the file */
2784   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2785   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2786 
2787   /* determine ownership of all (block) rows */
2788   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2789   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2790   else m = newMat->rmap->n; /* Set by user */
2791 
2792   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2793   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2794 
2795   /* First process needs enough room for process with most rows */
2796   if (!rank) {
2797     mmax = rowners[1];
2798     for (i=2; i<=size; i++) {
2799       mmax = PetscMax(mmax, rowners[i]);
2800     }
2801   } else mmax = -1;             /* unused, but compilers complain */
2802 
2803   rowners[0] = 0;
2804   for (i=2; i<=size; i++) {
2805     rowners[i] += rowners[i-1];
2806   }
2807   rstart = rowners[rank];
2808   rend   = rowners[rank+1];
2809 
2810   /* distribute row lengths to all processors */
2811   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2812   if (!rank) {
2813     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2814     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2815     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2816     for (j=0; j<m; j++) {
2817       procsnz[0] += ourlens[j];
2818     }
2819     for (i=1; i<size; i++) {
2820       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2821       /* calculate the number of nonzeros on each processor */
2822       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2823         procsnz[i] += rowlengths[j];
2824       }
2825       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2826     }
2827     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2828   } else {
2829     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2830   }
2831 
2832   if (!rank) {
2833     /* determine max buffer needed and allocate it */
2834     maxnz = 0;
2835     for (i=0; i<size; i++) {
2836       maxnz = PetscMax(maxnz,procsnz[i]);
2837     }
2838     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2839 
2840     /* read in my part of the matrix column indices  */
2841     nz   = procsnz[0];
2842     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2843     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2844 
2845     /* read in every one elses and ship off */
2846     for (i=1; i<size; i++) {
2847       nz   = procsnz[i];
2848       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2849       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2850     }
2851     ierr = PetscFree(cols);CHKERRQ(ierr);
2852   } else {
2853     /* determine buffer space needed for message */
2854     nz = 0;
2855     for (i=0; i<m; i++) {
2856       nz += ourlens[i];
2857     }
2858     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2859 
2860     /* receive message of column indices*/
2861     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2862   }
2863 
2864   /* determine column ownership if matrix is not square */
2865   if (N != M) {
2866     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2867     else n = newMat->cmap->n;
2868     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2869     cstart = cend - n;
2870   } else {
2871     cstart = rstart;
2872     cend   = rend;
2873     n      = cend - cstart;
2874   }
2875 
2876   /* loop over local rows, determining number of off diagonal entries */
2877   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2878   jj   = 0;
2879   for (i=0; i<m; i++) {
2880     for (j=0; j<ourlens[i]; j++) {
2881       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2882       jj++;
2883     }
2884   }
2885 
2886   for (i=0; i<m; i++) {
2887     ourlens[i] -= offlens[i];
2888   }
2889   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2890 
2891   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2892 
2893   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2894 
2895   for (i=0; i<m; i++) {
2896     ourlens[i] += offlens[i];
2897   }
2898 
2899   if (!rank) {
2900     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2901 
2902     /* read in my part of the matrix numerical values  */
2903     nz   = procsnz[0];
2904     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2905 
2906     /* insert into matrix */
2907     jj      = rstart;
2908     smycols = mycols;
2909     svals   = vals;
2910     for (i=0; i<m; i++) {
2911       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2912       smycols += ourlens[i];
2913       svals   += ourlens[i];
2914       jj++;
2915     }
2916 
2917     /* read in other processors and ship out */
2918     for (i=1; i<size; i++) {
2919       nz   = procsnz[i];
2920       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2921       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2922     }
2923     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2924   } else {
2925     /* receive numeric values */
2926     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2927 
2928     /* receive message of values*/
2929     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2930 
2931     /* insert into matrix */
2932     jj      = rstart;
2933     smycols = mycols;
2934     svals   = vals;
2935     for (i=0; i<m; i++) {
2936       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2937       smycols += ourlens[i];
2938       svals   += ourlens[i];
2939       jj++;
2940     }
2941   }
2942   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2943   ierr = PetscFree(vals);CHKERRQ(ierr);
2944   ierr = PetscFree(mycols);CHKERRQ(ierr);
2945   ierr = PetscFree(rowners);CHKERRQ(ierr);
2946   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2947   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2948   PetscFunctionReturn(0);
2949 }
2950 
2951 /* Not scalable because of ISAllGather() unless getting all columns. */
2952 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2953 {
2954   PetscErrorCode ierr;
2955   IS             iscol_local;
2956   PetscBool      isstride;
2957   PetscMPIInt    lisstride=0,gisstride;
2958 
2959   PetscFunctionBegin;
2960   /* check if we are grabbing all columns*/
2961   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
2962 
2963   if (isstride) {
2964     PetscInt  start,len,mstart,mlen;
2965     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
2966     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
2967     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
2968     if (mstart == start && mlen-mstart == len) lisstride = 1;
2969   }
2970 
2971   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2972   if (gisstride) {
2973     PetscInt N;
2974     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
2975     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
2976     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
2977     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
2978   } else {
2979     PetscInt cbs;
2980     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
2981     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2982     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
2983   }
2984 
2985   *isseq = iscol_local;
2986   PetscFunctionReturn(0);
2987 }
2988 
2989 /*
2990  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
2991  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
2992 
2993  Input Parameters:
2994    mat - matrix
2995    isrow - parallel row index set; its local indices are a subset of local columns of mat,
2996            i.e., mat->rstart <= isrow[i] < mat->rend
2997    iscol - parallel column index set; its local indices are a subset of local columns of mat,
2998            i.e., mat->cstart <= iscol[i] < mat->cend
2999  Output Parameter:
3000    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3001    iscol_o - sequential column index set for retrieving mat->B
3002    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3003  */
3004 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3005 {
3006   PetscErrorCode ierr;
3007   Vec            x,cmap;
3008   const PetscInt *is_idx;
3009   PetscScalar    *xarray,*cmaparray;
3010   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3011   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3012   Mat            B=a->B;
3013   Vec            lvec=a->lvec,lcmap;
3014   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3015   MPI_Comm       comm;
3016 
3017   PetscFunctionBegin;
3018   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3019   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3020 
3021   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3022   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3023   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3024   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3025 
3026   /* Get start indices */
3027   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3028   isstart -= ncols;
3029   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3030 
3031   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3032   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3033   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3034   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3035   for (i=0; i<ncols; i++) {
3036     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3037     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3038     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3039   }
3040   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3041   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3042   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3043 
3044   /* Get iscol_d */
3045   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3046   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3047   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3048 
3049   /* Get isrow_d */
3050   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3051   rstart = mat->rmap->rstart;
3052   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3053   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3054   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3055   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3056 
3057   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3058   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3059   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3060 
3061   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3062   ierr = VecScatterBegin(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3063 
3064   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3065 
3066   ierr = VecScatterEnd(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3067   ierr = VecScatterBegin(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3068   ierr = VecScatterEnd(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3069 
3070   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3071   /* off-process column indices */
3072   count = 0;
3073   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3074   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3075 
3076   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3077   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3078   for (i=0; i<Bn; i++) {
3079     if (PetscRealPart(xarray[i]) > -1.0) {
3080       idx[count]     = i;                   /* local column index in off-diagonal part B */
3081       cmap1[count++] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3082     }
3083   }
3084   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3085   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3086 
3087   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3088   /* cannot ensure iscol_o has same blocksize as iscol! */
3089 
3090   ierr = PetscFree(idx);CHKERRQ(ierr);
3091 
3092   *garray = cmap1;
3093 
3094   ierr = VecDestroy(&x);CHKERRQ(ierr);
3095   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3096   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3097   PetscFunctionReturn(0);
3098 }
3099 
3100 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3101 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3102 {
3103   PetscErrorCode ierr;
3104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3105   Mat            M = NULL;
3106   MPI_Comm       comm;
3107   IS             iscol_d,isrow_d,iscol_o;
3108   Mat            Asub = NULL,Bsub = NULL;
3109   PetscInt       n;
3110 
3111   PetscFunctionBegin;
3112   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3113 
3114   if (call == MAT_REUSE_MATRIX) {
3115     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3116     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3117     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3118 
3119     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3120     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3121 
3122     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3123     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3124 
3125     /* Update diagonal and off-diagonal portions of submat */
3126     asub = (Mat_MPIAIJ*)(*submat)->data;
3127     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3128     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3129     if (n) {
3130       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3131     }
3132     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3133     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3134 
3135   } else { /* call == MAT_INITIAL_MATRIX) */
3136     const PetscInt *garray;
3137     PetscInt        BsubN;
3138 
3139     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3140     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3141 
3142     /* Create local submatrices Asub and Bsub */
3143     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3144     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3145 
3146     /* Create submatrix M */
3147     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3148 
3149     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3150     asub = (Mat_MPIAIJ*)M->data;
3151 
3152     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3153     n = asub->B->cmap->N;
3154     if (BsubN > n) {
3155       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3156       const PetscInt *idx;
3157       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3158       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3159 
3160       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3161       j = 0;
3162       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3163       for (i=0; i<n; i++) {
3164         if (j >= BsubN) break;
3165         while (subgarray[i] > garray[j]) j++;
3166 
3167         if (subgarray[i] == garray[j]) {
3168           idx_new[i] = idx[j++];
3169         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3170       }
3171       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3172 
3173       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3174       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3175 
3176     } else if (BsubN < n) {
3177       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3178     }
3179 
3180     ierr = PetscFree(garray);CHKERRQ(ierr);
3181     *submat = M;
3182 
3183     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3184     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3185     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3186 
3187     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3188     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3189 
3190     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3191     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3192   }
3193   PetscFunctionReturn(0);
3194 }
3195 
3196 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3197 {
3198   PetscErrorCode ierr;
3199   IS             iscol_local=NULL,isrow_d;
3200   PetscInt       csize;
3201   PetscInt       n,i,j,start,end;
3202   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3203   MPI_Comm       comm;
3204 
3205   PetscFunctionBegin;
3206   /* If isrow has same processor distribution as mat,
3207      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3208   if (call == MAT_REUSE_MATRIX) {
3209     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3210     if (isrow_d) {
3211       sameRowDist  = PETSC_TRUE;
3212       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3213     } else {
3214       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3215       if (iscol_local) {
3216         sameRowDist  = PETSC_TRUE;
3217         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3218       }
3219     }
3220   } else {
3221     /* Check if isrow has same processor distribution as mat */
3222     sameDist[0] = PETSC_FALSE;
3223     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3224     if (!n) {
3225       sameDist[0] = PETSC_TRUE;
3226     } else {
3227       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3228       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3229       if (i >= start && j < end) {
3230         sameDist[0] = PETSC_TRUE;
3231       }
3232     }
3233 
3234     /* Check if iscol has same processor distribution as mat */
3235     sameDist[1] = PETSC_FALSE;
3236     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3237     if (!n) {
3238       sameDist[1] = PETSC_TRUE;
3239     } else {
3240       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3241       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3242       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3243     }
3244 
3245     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3246     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3247     sameRowDist = tsameDist[0];
3248   }
3249 
3250   if (sameRowDist) {
3251     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3252       /* isrow and iscol have same processor distribution as mat */
3253       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3254       PetscFunctionReturn(0);
3255     } else { /* sameRowDist */
3256       /* isrow has same processor distribution as mat */
3257       if (call == MAT_INITIAL_MATRIX) {
3258         PetscBool sorted;
3259         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3260         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3261         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3262         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3263 
3264         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3265         if (sorted) {
3266           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3267           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3268           PetscFunctionReturn(0);
3269         }
3270       } else { /* call == MAT_REUSE_MATRIX */
3271         IS    iscol_sub;
3272         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3273         if (iscol_sub) {
3274           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3275           PetscFunctionReturn(0);
3276         }
3277       }
3278     }
3279   }
3280 
3281   /* General case: iscol -> iscol_local which has global size of iscol */
3282   if (call == MAT_REUSE_MATRIX) {
3283     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3284     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3285   } else {
3286     if (!iscol_local) {
3287       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3288     }
3289   }
3290 
3291   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3292   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3293 
3294   if (call == MAT_INITIAL_MATRIX) {
3295     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3296     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3297   }
3298   PetscFunctionReturn(0);
3299 }
3300 
3301 /*@C
3302      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3303          and "off-diagonal" part of the matrix in CSR format.
3304 
3305    Collective on MPI_Comm
3306 
3307    Input Parameters:
3308 +  comm - MPI communicator
3309 .  A - "diagonal" portion of matrix
3310 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3311 -  garray - global index of B columns
3312 
3313    Output Parameter:
3314 .   mat - the matrix, with input A as its local diagonal matrix
3315    Level: advanced
3316 
3317    Notes:
3318        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3319        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3320 
3321 .seealso: MatCreateMPIAIJWithSplitArrays()
3322 @*/
3323 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3324 {
3325   PetscErrorCode ierr;
3326   Mat_MPIAIJ     *maij;
3327   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3328   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3329   PetscScalar    *oa=b->a;
3330   Mat            Bnew;
3331   PetscInt       m,n,N;
3332 
3333   PetscFunctionBegin;
3334   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3335   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3336   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3337   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3338   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3339   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3340 
3341   /* Get global columns of mat */
3342   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3343 
3344   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3345   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3346   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3347   maij = (Mat_MPIAIJ*)(*mat)->data;
3348 
3349   (*mat)->preallocated = PETSC_TRUE;
3350 
3351   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3352   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3353 
3354   /* Set A as diagonal portion of *mat */
3355   maij->A = A;
3356 
3357   nz = oi[m];
3358   for (i=0; i<nz; i++) {
3359     col   = oj[i];
3360     oj[i] = garray[col];
3361   }
3362 
3363    /* Set Bnew as off-diagonal portion of *mat */
3364   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3365   bnew        = (Mat_SeqAIJ*)Bnew->data;
3366   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3367   maij->B     = Bnew;
3368 
3369   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3370 
3371   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3372   b->free_a       = PETSC_FALSE;
3373   b->free_ij      = PETSC_FALSE;
3374   ierr = MatDestroy(&B);CHKERRQ(ierr);
3375 
3376   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3377   bnew->free_a       = PETSC_TRUE;
3378   bnew->free_ij      = PETSC_TRUE;
3379 
3380   /* condense columns of maij->B */
3381   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3382   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3383   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3384   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3385   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3386   PetscFunctionReturn(0);
3387 }
3388 
3389 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3390 
3391 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3392 {
3393   PetscErrorCode ierr;
3394   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3395   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3396   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3397   Mat            M,Msub,B=a->B;
3398   MatScalar      *aa;
3399   Mat_SeqAIJ     *aij;
3400   PetscInt       *garray = a->garray,*colsub,Ncols;
3401   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3402   IS             iscol_sub,iscmap;
3403   const PetscInt *is_idx,*cmap;
3404   PetscBool      allcolumns=PETSC_FALSE;
3405   MPI_Comm       comm;
3406 
3407   PetscFunctionBegin;
3408   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3409 
3410   if (call == MAT_REUSE_MATRIX) {
3411     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3412     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3413     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3414 
3415     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3416     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3417 
3418     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3419     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3420 
3421     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3422 
3423   } else { /* call == MAT_INITIAL_MATRIX) */
3424     PetscBool flg;
3425 
3426     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3427     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3428 
3429     /* (1) iscol -> nonscalable iscol_local */
3430     /* Check for special case: each processor gets entire matrix columns */
3431     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3432     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3433     if (allcolumns) {
3434       iscol_sub = iscol_local;
3435       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3436       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3437 
3438     } else {
3439       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3440       PetscInt *idx,*cmap1,k;
3441       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3442       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3443       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3444       count = 0;
3445       k     = 0;
3446       for (i=0; i<Ncols; i++) {
3447         j = is_idx[i];
3448         if (j >= cstart && j < cend) {
3449           /* diagonal part of mat */
3450           idx[count]     = j;
3451           cmap1[count++] = i; /* column index in submat */
3452         } else if (Bn) {
3453           /* off-diagonal part of mat */
3454           if (j == garray[k]) {
3455             idx[count]     = j;
3456             cmap1[count++] = i;  /* column index in submat */
3457           } else if (j > garray[k]) {
3458             while (j > garray[k] && k < Bn-1) k++;
3459             if (j == garray[k]) {
3460               idx[count]     = j;
3461               cmap1[count++] = i; /* column index in submat */
3462             }
3463           }
3464         }
3465       }
3466       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3467 
3468       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3469       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3470       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3471 
3472       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3473     }
3474 
3475     /* (3) Create sequential Msub */
3476     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3477   }
3478 
3479   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3480   aij  = (Mat_SeqAIJ*)(Msub)->data;
3481   ii   = aij->i;
3482   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3483 
3484   /*
3485       m - number of local rows
3486       Ncols - number of columns (same on all processors)
3487       rstart - first row in new global matrix generated
3488   */
3489   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3490 
3491   if (call == MAT_INITIAL_MATRIX) {
3492     /* (4) Create parallel newmat */
3493     PetscMPIInt    rank,size;
3494     PetscInt       csize;
3495 
3496     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3497     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3498 
3499     /*
3500         Determine the number of non-zeros in the diagonal and off-diagonal
3501         portions of the matrix in order to do correct preallocation
3502     */
3503 
3504     /* first get start and end of "diagonal" columns */
3505     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3506     if (csize == PETSC_DECIDE) {
3507       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3508       if (mglobal == Ncols) { /* square matrix */
3509         nlocal = m;
3510       } else {
3511         nlocal = Ncols/size + ((Ncols % size) > rank);
3512       }
3513     } else {
3514       nlocal = csize;
3515     }
3516     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3517     rstart = rend - nlocal;
3518     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3519 
3520     /* next, compute all the lengths */
3521     jj    = aij->j;
3522     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3523     olens = dlens + m;
3524     for (i=0; i<m; i++) {
3525       jend = ii[i+1] - ii[i];
3526       olen = 0;
3527       dlen = 0;
3528       for (j=0; j<jend; j++) {
3529         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3530         else dlen++;
3531         jj++;
3532       }
3533       olens[i] = olen;
3534       dlens[i] = dlen;
3535     }
3536 
3537     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3538     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3539 
3540     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3541     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3542     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3543     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3544     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3545     ierr = PetscFree(dlens);CHKERRQ(ierr);
3546 
3547   } else { /* call == MAT_REUSE_MATRIX */
3548     M    = *newmat;
3549     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3550     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3551     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3552     /*
3553          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3554        rather than the slower MatSetValues().
3555     */
3556     M->was_assembled = PETSC_TRUE;
3557     M->assembled     = PETSC_FALSE;
3558   }
3559 
3560   /* (5) Set values of Msub to *newmat */
3561   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3562   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3563 
3564   jj   = aij->j;
3565   aa   = aij->a;
3566   for (i=0; i<m; i++) {
3567     row = rstart + i;
3568     nz  = ii[i+1] - ii[i];
3569     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3570     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3571     jj += nz; aa += nz;
3572   }
3573   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3574 
3575   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3576   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3577 
3578   ierr = PetscFree(colsub);CHKERRQ(ierr);
3579 
3580   /* save Msub, iscol_sub and iscmap used in processor for next request */
3581   if (call ==  MAT_INITIAL_MATRIX) {
3582     *newmat = M;
3583     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3584     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3585 
3586     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3587     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3588 
3589     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3590     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3591 
3592     if (iscol_local) {
3593       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3594       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3595     }
3596   }
3597   PetscFunctionReturn(0);
3598 }
3599 
3600 /*
3601     Not great since it makes two copies of the submatrix, first an SeqAIJ
3602   in local and then by concatenating the local matrices the end result.
3603   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3604 
3605   Note: This requires a sequential iscol with all indices.
3606 */
3607 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3608 {
3609   PetscErrorCode ierr;
3610   PetscMPIInt    rank,size;
3611   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3612   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3613   Mat            M,Mreuse;
3614   MatScalar      *aa,*vwork;
3615   MPI_Comm       comm;
3616   Mat_SeqAIJ     *aij;
3617   PetscBool      colflag,allcolumns=PETSC_FALSE;
3618 
3619   PetscFunctionBegin;
3620   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3621   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3622   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3623 
3624   /* Check for special case: each processor gets entire matrix columns */
3625   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3626   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3627   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3628 
3629   if (call ==  MAT_REUSE_MATRIX) {
3630     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3631     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3632     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3633   } else {
3634     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3635   }
3636 
3637   /*
3638       m - number of local rows
3639       n - number of columns (same on all processors)
3640       rstart - first row in new global matrix generated
3641   */
3642   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3643   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3644   if (call == MAT_INITIAL_MATRIX) {
3645     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3646     ii  = aij->i;
3647     jj  = aij->j;
3648 
3649     /*
3650         Determine the number of non-zeros in the diagonal and off-diagonal
3651         portions of the matrix in order to do correct preallocation
3652     */
3653 
3654     /* first get start and end of "diagonal" columns */
3655     if (csize == PETSC_DECIDE) {
3656       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3657       if (mglobal == n) { /* square matrix */
3658         nlocal = m;
3659       } else {
3660         nlocal = n/size + ((n % size) > rank);
3661       }
3662     } else {
3663       nlocal = csize;
3664     }
3665     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3666     rstart = rend - nlocal;
3667     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3668 
3669     /* next, compute all the lengths */
3670     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3671     olens = dlens + m;
3672     for (i=0; i<m; i++) {
3673       jend = ii[i+1] - ii[i];
3674       olen = 0;
3675       dlen = 0;
3676       for (j=0; j<jend; j++) {
3677         if (*jj < rstart || *jj >= rend) olen++;
3678         else dlen++;
3679         jj++;
3680       }
3681       olens[i] = olen;
3682       dlens[i] = dlen;
3683     }
3684     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3685     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3686     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3687     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3688     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3689     ierr = PetscFree(dlens);CHKERRQ(ierr);
3690   } else {
3691     PetscInt ml,nl;
3692 
3693     M    = *newmat;
3694     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3695     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3696     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3697     /*
3698          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3699        rather than the slower MatSetValues().
3700     */
3701     M->was_assembled = PETSC_TRUE;
3702     M->assembled     = PETSC_FALSE;
3703   }
3704   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3705   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3706   ii   = aij->i;
3707   jj   = aij->j;
3708   aa   = aij->a;
3709   for (i=0; i<m; i++) {
3710     row   = rstart + i;
3711     nz    = ii[i+1] - ii[i];
3712     cwork = jj;     jj += nz;
3713     vwork = aa;     aa += nz;
3714     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3715   }
3716 
3717   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3718   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3719   *newmat = M;
3720 
3721   /* save submatrix used in processor for next request */
3722   if (call ==  MAT_INITIAL_MATRIX) {
3723     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3724     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3725   }
3726   PetscFunctionReturn(0);
3727 }
3728 
3729 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3730 {
3731   PetscInt       m,cstart, cend,j,nnz,i,d;
3732   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3733   const PetscInt *JJ;
3734   PetscScalar    *values;
3735   PetscErrorCode ierr;
3736   PetscBool      nooffprocentries;
3737 
3738   PetscFunctionBegin;
3739   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3740 
3741   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3742   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3743   m      = B->rmap->n;
3744   cstart = B->cmap->rstart;
3745   cend   = B->cmap->rend;
3746   rstart = B->rmap->rstart;
3747 
3748   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3749 
3750 #if defined(PETSC_USE_DEBUGGING)
3751   for (i=0; i<m; i++) {
3752     nnz = Ii[i+1]- Ii[i];
3753     JJ  = J + Ii[i];
3754     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3755     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3756     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3757   }
3758 #endif
3759 
3760   for (i=0; i<m; i++) {
3761     nnz     = Ii[i+1]- Ii[i];
3762     JJ      = J + Ii[i];
3763     nnz_max = PetscMax(nnz_max,nnz);
3764     d       = 0;
3765     for (j=0; j<nnz; j++) {
3766       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3767     }
3768     d_nnz[i] = d;
3769     o_nnz[i] = nnz - d;
3770   }
3771   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3772   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3773 
3774   if (v) values = (PetscScalar*)v;
3775   else {
3776     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3777   }
3778 
3779   for (i=0; i<m; i++) {
3780     ii   = i + rstart;
3781     nnz  = Ii[i+1]- Ii[i];
3782     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3783   }
3784   nooffprocentries    = B->nooffprocentries;
3785   B->nooffprocentries = PETSC_TRUE;
3786   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3787   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3788   B->nooffprocentries = nooffprocentries;
3789 
3790   if (!v) {
3791     ierr = PetscFree(values);CHKERRQ(ierr);
3792   }
3793   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3794   PetscFunctionReturn(0);
3795 }
3796 
3797 /*@
3798    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3799    (the default parallel PETSc format).
3800 
3801    Collective on MPI_Comm
3802 
3803    Input Parameters:
3804 +  B - the matrix
3805 .  i - the indices into j for the start of each local row (starts with zero)
3806 .  j - the column indices for each local row (starts with zero)
3807 -  v - optional values in the matrix
3808 
3809    Level: developer
3810 
3811    Notes:
3812        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3813      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3814      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3815 
3816        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3817 
3818        The format which is used for the sparse matrix input, is equivalent to a
3819     row-major ordering.. i.e for the following matrix, the input data expected is
3820     as shown
3821 
3822 $        1 0 0
3823 $        2 0 3     P0
3824 $       -------
3825 $        4 5 6     P1
3826 $
3827 $     Process0 [P0]: rows_owned=[0,1]
3828 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3829 $        j =  {0,0,2}  [size = 3]
3830 $        v =  {1,2,3}  [size = 3]
3831 $
3832 $     Process1 [P1]: rows_owned=[2]
3833 $        i =  {0,3}    [size = nrow+1  = 1+1]
3834 $        j =  {0,1,2}  [size = 3]
3835 $        v =  {4,5,6}  [size = 3]
3836 
3837 .keywords: matrix, aij, compressed row, sparse, parallel
3838 
3839 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3840           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3841 @*/
3842 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3843 {
3844   PetscErrorCode ierr;
3845 
3846   PetscFunctionBegin;
3847   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3848   PetscFunctionReturn(0);
3849 }
3850 
3851 /*@C
3852    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3853    (the default parallel PETSc format).  For good matrix assembly performance
3854    the user should preallocate the matrix storage by setting the parameters
3855    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3856    performance can be increased by more than a factor of 50.
3857 
3858    Collective on MPI_Comm
3859 
3860    Input Parameters:
3861 +  B - the matrix
3862 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3863            (same value is used for all local rows)
3864 .  d_nnz - array containing the number of nonzeros in the various rows of the
3865            DIAGONAL portion of the local submatrix (possibly different for each row)
3866            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3867            The size of this array is equal to the number of local rows, i.e 'm'.
3868            For matrices that will be factored, you must leave room for (and set)
3869            the diagonal entry even if it is zero.
3870 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3871            submatrix (same value is used for all local rows).
3872 -  o_nnz - array containing the number of nonzeros in the various rows of the
3873            OFF-DIAGONAL portion of the local submatrix (possibly different for
3874            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3875            structure. The size of this array is equal to the number
3876            of local rows, i.e 'm'.
3877 
3878    If the *_nnz parameter is given then the *_nz parameter is ignored
3879 
3880    The AIJ format (also called the Yale sparse matrix format or
3881    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3882    storage.  The stored row and column indices begin with zero.
3883    See Users-Manual: ch_mat for details.
3884 
3885    The parallel matrix is partitioned such that the first m0 rows belong to
3886    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3887    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3888 
3889    The DIAGONAL portion of the local submatrix of a processor can be defined
3890    as the submatrix which is obtained by extraction the part corresponding to
3891    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3892    first row that belongs to the processor, r2 is the last row belonging to
3893    the this processor, and c1-c2 is range of indices of the local part of a
3894    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3895    common case of a square matrix, the row and column ranges are the same and
3896    the DIAGONAL part is also square. The remaining portion of the local
3897    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3898 
3899    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3900 
3901    You can call MatGetInfo() to get information on how effective the preallocation was;
3902    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3903    You can also run with the option -info and look for messages with the string
3904    malloc in them to see if additional memory allocation was needed.
3905 
3906    Example usage:
3907 
3908    Consider the following 8x8 matrix with 34 non-zero values, that is
3909    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3910    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3911    as follows:
3912 
3913 .vb
3914             1  2  0  |  0  3  0  |  0  4
3915     Proc0   0  5  6  |  7  0  0  |  8  0
3916             9  0 10  | 11  0  0  | 12  0
3917     -------------------------------------
3918            13  0 14  | 15 16 17  |  0  0
3919     Proc1   0 18  0  | 19 20 21  |  0  0
3920             0  0  0  | 22 23  0  | 24  0
3921     -------------------------------------
3922     Proc2  25 26 27  |  0  0 28  | 29  0
3923            30  0  0  | 31 32 33  |  0 34
3924 .ve
3925 
3926    This can be represented as a collection of submatrices as:
3927 
3928 .vb
3929       A B C
3930       D E F
3931       G H I
3932 .ve
3933 
3934    Where the submatrices A,B,C are owned by proc0, D,E,F are
3935    owned by proc1, G,H,I are owned by proc2.
3936 
3937    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3938    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3939    The 'M','N' parameters are 8,8, and have the same values on all procs.
3940 
3941    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3942    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3943    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3944    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3945    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3946    matrix, ans [DF] as another SeqAIJ matrix.
3947 
3948    When d_nz, o_nz parameters are specified, d_nz storage elements are
3949    allocated for every row of the local diagonal submatrix, and o_nz
3950    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3951    One way to choose d_nz and o_nz is to use the max nonzerors per local
3952    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3953    In this case, the values of d_nz,o_nz are:
3954 .vb
3955      proc0 : dnz = 2, o_nz = 2
3956      proc1 : dnz = 3, o_nz = 2
3957      proc2 : dnz = 1, o_nz = 4
3958 .ve
3959    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3960    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3961    for proc3. i.e we are using 12+15+10=37 storage locations to store
3962    34 values.
3963 
3964    When d_nnz, o_nnz parameters are specified, the storage is specified
3965    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3966    In the above case the values for d_nnz,o_nnz are:
3967 .vb
3968      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3969      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3970      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3971 .ve
3972    Here the space allocated is sum of all the above values i.e 34, and
3973    hence pre-allocation is perfect.
3974 
3975    Level: intermediate
3976 
3977 .keywords: matrix, aij, compressed row, sparse, parallel
3978 
3979 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3980           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
3981 @*/
3982 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3983 {
3984   PetscErrorCode ierr;
3985 
3986   PetscFunctionBegin;
3987   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3988   PetscValidType(B,1);
3989   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3990   PetscFunctionReturn(0);
3991 }
3992 
3993 /*@
3994      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3995          CSR format the local rows.
3996 
3997    Collective on MPI_Comm
3998 
3999    Input Parameters:
4000 +  comm - MPI communicator
4001 .  m - number of local rows (Cannot be PETSC_DECIDE)
4002 .  n - This value should be the same as the local size used in creating the
4003        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4004        calculated if N is given) For square matrices n is almost always m.
4005 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4006 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4007 .   i - row indices
4008 .   j - column indices
4009 -   a - matrix values
4010 
4011    Output Parameter:
4012 .   mat - the matrix
4013 
4014    Level: intermediate
4015 
4016    Notes:
4017        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4018      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4019      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4020 
4021        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4022 
4023        The format which is used for the sparse matrix input, is equivalent to a
4024     row-major ordering.. i.e for the following matrix, the input data expected is
4025     as shown
4026 
4027 $        1 0 0
4028 $        2 0 3     P0
4029 $       -------
4030 $        4 5 6     P1
4031 $
4032 $     Process0 [P0]: rows_owned=[0,1]
4033 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4034 $        j =  {0,0,2}  [size = 3]
4035 $        v =  {1,2,3}  [size = 3]
4036 $
4037 $     Process1 [P1]: rows_owned=[2]
4038 $        i =  {0,3}    [size = nrow+1  = 1+1]
4039 $        j =  {0,1,2}  [size = 3]
4040 $        v =  {4,5,6}  [size = 3]
4041 
4042 .keywords: matrix, aij, compressed row, sparse, parallel
4043 
4044 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4045           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4046 @*/
4047 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4048 {
4049   PetscErrorCode ierr;
4050 
4051   PetscFunctionBegin;
4052   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4053   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4054   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4055   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4056   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4057   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4058   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4059   PetscFunctionReturn(0);
4060 }
4061 
4062 /*@C
4063    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4064    (the default parallel PETSc format).  For good matrix assembly performance
4065    the user should preallocate the matrix storage by setting the parameters
4066    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4067    performance can be increased by more than a factor of 50.
4068 
4069    Collective on MPI_Comm
4070 
4071    Input Parameters:
4072 +  comm - MPI communicator
4073 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4074            This value should be the same as the local size used in creating the
4075            y vector for the matrix-vector product y = Ax.
4076 .  n - This value should be the same as the local size used in creating the
4077        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4078        calculated if N is given) For square matrices n is almost always m.
4079 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4080 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4081 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4082            (same value is used for all local rows)
4083 .  d_nnz - array containing the number of nonzeros in the various rows of the
4084            DIAGONAL portion of the local submatrix (possibly different for each row)
4085            or NULL, if d_nz is used to specify the nonzero structure.
4086            The size of this array is equal to the number of local rows, i.e 'm'.
4087 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4088            submatrix (same value is used for all local rows).
4089 -  o_nnz - array containing the number of nonzeros in the various rows of the
4090            OFF-DIAGONAL portion of the local submatrix (possibly different for
4091            each row) or NULL, if o_nz is used to specify the nonzero
4092            structure. The size of this array is equal to the number
4093            of local rows, i.e 'm'.
4094 
4095    Output Parameter:
4096 .  A - the matrix
4097 
4098    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4099    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4100    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4101 
4102    Notes:
4103    If the *_nnz parameter is given then the *_nz parameter is ignored
4104 
4105    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4106    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4107    storage requirements for this matrix.
4108 
4109    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4110    processor than it must be used on all processors that share the object for
4111    that argument.
4112 
4113    The user MUST specify either the local or global matrix dimensions
4114    (possibly both).
4115 
4116    The parallel matrix is partitioned across processors such that the
4117    first m0 rows belong to process 0, the next m1 rows belong to
4118    process 1, the next m2 rows belong to process 2 etc.. where
4119    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4120    values corresponding to [m x N] submatrix.
4121 
4122    The columns are logically partitioned with the n0 columns belonging
4123    to 0th partition, the next n1 columns belonging to the next
4124    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4125 
4126    The DIAGONAL portion of the local submatrix on any given processor
4127    is the submatrix corresponding to the rows and columns m,n
4128    corresponding to the given processor. i.e diagonal matrix on
4129    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4130    etc. The remaining portion of the local submatrix [m x (N-n)]
4131    constitute the OFF-DIAGONAL portion. The example below better
4132    illustrates this concept.
4133 
4134    For a square global matrix we define each processor's diagonal portion
4135    to be its local rows and the corresponding columns (a square submatrix);
4136    each processor's off-diagonal portion encompasses the remainder of the
4137    local matrix (a rectangular submatrix).
4138 
4139    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4140 
4141    When calling this routine with a single process communicator, a matrix of
4142    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4143    type of communicator, use the construction mechanism
4144 .vb
4145      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4146 .ve
4147 
4148 $     MatCreate(...,&A);
4149 $     MatSetType(A,MATMPIAIJ);
4150 $     MatSetSizes(A, m,n,M,N);
4151 $     MatMPIAIJSetPreallocation(A,...);
4152 
4153    By default, this format uses inodes (identical nodes) when possible.
4154    We search for consecutive rows with the same nonzero structure, thereby
4155    reusing matrix information to achieve increased efficiency.
4156 
4157    Options Database Keys:
4158 +  -mat_no_inode  - Do not use inodes
4159 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4160 -  -mat_aij_oneindex - Internally use indexing starting at 1
4161         rather than 0.  Note that when calling MatSetValues(),
4162         the user still MUST index entries starting at 0!
4163 
4164 
4165    Example usage:
4166 
4167    Consider the following 8x8 matrix with 34 non-zero values, that is
4168    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4169    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4170    as follows
4171 
4172 .vb
4173             1  2  0  |  0  3  0  |  0  4
4174     Proc0   0  5  6  |  7  0  0  |  8  0
4175             9  0 10  | 11  0  0  | 12  0
4176     -------------------------------------
4177            13  0 14  | 15 16 17  |  0  0
4178     Proc1   0 18  0  | 19 20 21  |  0  0
4179             0  0  0  | 22 23  0  | 24  0
4180     -------------------------------------
4181     Proc2  25 26 27  |  0  0 28  | 29  0
4182            30  0  0  | 31 32 33  |  0 34
4183 .ve
4184 
4185    This can be represented as a collection of submatrices as
4186 
4187 .vb
4188       A B C
4189       D E F
4190       G H I
4191 .ve
4192 
4193    Where the submatrices A,B,C are owned by proc0, D,E,F are
4194    owned by proc1, G,H,I are owned by proc2.
4195 
4196    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4197    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4198    The 'M','N' parameters are 8,8, and have the same values on all procs.
4199 
4200    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4201    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4202    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4203    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4204    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4205    matrix, ans [DF] as another SeqAIJ matrix.
4206 
4207    When d_nz, o_nz parameters are specified, d_nz storage elements are
4208    allocated for every row of the local diagonal submatrix, and o_nz
4209    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4210    One way to choose d_nz and o_nz is to use the max nonzerors per local
4211    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4212    In this case, the values of d_nz,o_nz are
4213 .vb
4214      proc0 : dnz = 2, o_nz = 2
4215      proc1 : dnz = 3, o_nz = 2
4216      proc2 : dnz = 1, o_nz = 4
4217 .ve
4218    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4219    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4220    for proc3. i.e we are using 12+15+10=37 storage locations to store
4221    34 values.
4222 
4223    When d_nnz, o_nnz parameters are specified, the storage is specified
4224    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4225    In the above case the values for d_nnz,o_nnz are
4226 .vb
4227      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4228      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4229      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4230 .ve
4231    Here the space allocated is sum of all the above values i.e 34, and
4232    hence pre-allocation is perfect.
4233 
4234    Level: intermediate
4235 
4236 .keywords: matrix, aij, compressed row, sparse, parallel
4237 
4238 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4239           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4240 @*/
4241 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4242 {
4243   PetscErrorCode ierr;
4244   PetscMPIInt    size;
4245 
4246   PetscFunctionBegin;
4247   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4248   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4249   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4250   if (size > 1) {
4251     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4252     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4253   } else {
4254     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4255     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4256   }
4257   PetscFunctionReturn(0);
4258 }
4259 
4260 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4261 {
4262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4263   PetscBool      flg;
4264   PetscErrorCode ierr;
4265 
4266   PetscFunctionBegin;
4267   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4268   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4269   if (Ad)     *Ad     = a->A;
4270   if (Ao)     *Ao     = a->B;
4271   if (colmap) *colmap = a->garray;
4272   PetscFunctionReturn(0);
4273 }
4274 
4275 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4276 {
4277   PetscErrorCode ierr;
4278   PetscInt       m,N,i,rstart,nnz,Ii;
4279   PetscInt       *indx;
4280   PetscScalar    *values;
4281 
4282   PetscFunctionBegin;
4283   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4284   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4285     PetscInt       *dnz,*onz,sum,bs,cbs;
4286 
4287     if (n == PETSC_DECIDE) {
4288       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4289     }
4290     /* Check sum(n) = N */
4291     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4292     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4293 
4294     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4295     rstart -= m;
4296 
4297     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4298     for (i=0; i<m; i++) {
4299       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4300       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4301       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4302     }
4303 
4304     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4305     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4306     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4307     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4308     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4309     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4310     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4311     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4312   }
4313 
4314   /* numeric phase */
4315   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4316   for (i=0; i<m; i++) {
4317     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4318     Ii   = i + rstart;
4319     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4320     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4321   }
4322   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4323   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4324   PetscFunctionReturn(0);
4325 }
4326 
4327 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4328 {
4329   PetscErrorCode    ierr;
4330   PetscMPIInt       rank;
4331   PetscInt          m,N,i,rstart,nnz;
4332   size_t            len;
4333   const PetscInt    *indx;
4334   PetscViewer       out;
4335   char              *name;
4336   Mat               B;
4337   const PetscScalar *values;
4338 
4339   PetscFunctionBegin;
4340   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4341   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4342   /* Should this be the type of the diagonal block of A? */
4343   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4344   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4345   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4346   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4347   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4348   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4349   for (i=0; i<m; i++) {
4350     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4351     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4352     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4353   }
4354   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4355   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4356 
4357   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4358   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4359   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4360   sprintf(name,"%s.%d",outfile,rank);
4361   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4362   ierr = PetscFree(name);CHKERRQ(ierr);
4363   ierr = MatView(B,out);CHKERRQ(ierr);
4364   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4365   ierr = MatDestroy(&B);CHKERRQ(ierr);
4366   PetscFunctionReturn(0);
4367 }
4368 
4369 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4370 {
4371   PetscErrorCode      ierr;
4372   Mat_Merge_SeqsToMPI *merge;
4373   PetscContainer      container;
4374 
4375   PetscFunctionBegin;
4376   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4377   if (container) {
4378     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4379     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4380     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4381     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4382     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4383     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4384     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4385     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4386     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4387     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4388     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4389     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4390     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4391     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4392     ierr = PetscFree(merge);CHKERRQ(ierr);
4393     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4394   }
4395   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4396   PetscFunctionReturn(0);
4397 }
4398 
4399 #include <../src/mat/utils/freespace.h>
4400 #include <petscbt.h>
4401 
4402 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4403 {
4404   PetscErrorCode      ierr;
4405   MPI_Comm            comm;
4406   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4407   PetscMPIInt         size,rank,taga,*len_s;
4408   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4409   PetscInt            proc,m;
4410   PetscInt            **buf_ri,**buf_rj;
4411   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4412   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4413   MPI_Request         *s_waits,*r_waits;
4414   MPI_Status          *status;
4415   MatScalar           *aa=a->a;
4416   MatScalar           **abuf_r,*ba_i;
4417   Mat_Merge_SeqsToMPI *merge;
4418   PetscContainer      container;
4419 
4420   PetscFunctionBegin;
4421   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4422   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4423 
4424   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4425   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4426 
4427   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4428   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4429 
4430   bi     = merge->bi;
4431   bj     = merge->bj;
4432   buf_ri = merge->buf_ri;
4433   buf_rj = merge->buf_rj;
4434 
4435   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4436   owners = merge->rowmap->range;
4437   len_s  = merge->len_s;
4438 
4439   /* send and recv matrix values */
4440   /*-----------------------------*/
4441   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4442   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4443 
4444   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4445   for (proc=0,k=0; proc<size; proc++) {
4446     if (!len_s[proc]) continue;
4447     i    = owners[proc];
4448     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4449     k++;
4450   }
4451 
4452   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4453   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4454   ierr = PetscFree(status);CHKERRQ(ierr);
4455 
4456   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4457   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4458 
4459   /* insert mat values of mpimat */
4460   /*----------------------------*/
4461   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4462   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4463 
4464   for (k=0; k<merge->nrecv; k++) {
4465     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4466     nrows       = *(buf_ri_k[k]);
4467     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4468     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4469   }
4470 
4471   /* set values of ba */
4472   m = merge->rowmap->n;
4473   for (i=0; i<m; i++) {
4474     arow = owners[rank] + i;
4475     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4476     bnzi = bi[i+1] - bi[i];
4477     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4478 
4479     /* add local non-zero vals of this proc's seqmat into ba */
4480     anzi   = ai[arow+1] - ai[arow];
4481     aj     = a->j + ai[arow];
4482     aa     = a->a + ai[arow];
4483     nextaj = 0;
4484     for (j=0; nextaj<anzi; j++) {
4485       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4486         ba_i[j] += aa[nextaj++];
4487       }
4488     }
4489 
4490     /* add received vals into ba */
4491     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4492       /* i-th row */
4493       if (i == *nextrow[k]) {
4494         anzi   = *(nextai[k]+1) - *nextai[k];
4495         aj     = buf_rj[k] + *(nextai[k]);
4496         aa     = abuf_r[k] + *(nextai[k]);
4497         nextaj = 0;
4498         for (j=0; nextaj<anzi; j++) {
4499           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4500             ba_i[j] += aa[nextaj++];
4501           }
4502         }
4503         nextrow[k]++; nextai[k]++;
4504       }
4505     }
4506     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4507   }
4508   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4509   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4510 
4511   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4512   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4513   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4514   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4515   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4516   PetscFunctionReturn(0);
4517 }
4518 
4519 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4520 {
4521   PetscErrorCode      ierr;
4522   Mat                 B_mpi;
4523   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4524   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4525   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4526   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4527   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4528   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4529   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4530   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4531   MPI_Status          *status;
4532   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4533   PetscBT             lnkbt;
4534   Mat_Merge_SeqsToMPI *merge;
4535   PetscContainer      container;
4536 
4537   PetscFunctionBegin;
4538   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4539 
4540   /* make sure it is a PETSc comm */
4541   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4542   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4543   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4544 
4545   ierr = PetscNew(&merge);CHKERRQ(ierr);
4546   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4547 
4548   /* determine row ownership */
4549   /*---------------------------------------------------------*/
4550   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4551   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4552   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4553   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4554   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4555   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4556   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4557 
4558   m      = merge->rowmap->n;
4559   owners = merge->rowmap->range;
4560 
4561   /* determine the number of messages to send, their lengths */
4562   /*---------------------------------------------------------*/
4563   len_s = merge->len_s;
4564 
4565   len          = 0; /* length of buf_si[] */
4566   merge->nsend = 0;
4567   for (proc=0; proc<size; proc++) {
4568     len_si[proc] = 0;
4569     if (proc == rank) {
4570       len_s[proc] = 0;
4571     } else {
4572       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4573       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4574     }
4575     if (len_s[proc]) {
4576       merge->nsend++;
4577       nrows = 0;
4578       for (i=owners[proc]; i<owners[proc+1]; i++) {
4579         if (ai[i+1] > ai[i]) nrows++;
4580       }
4581       len_si[proc] = 2*(nrows+1);
4582       len         += len_si[proc];
4583     }
4584   }
4585 
4586   /* determine the number and length of messages to receive for ij-structure */
4587   /*-------------------------------------------------------------------------*/
4588   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4589   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4590 
4591   /* post the Irecv of j-structure */
4592   /*-------------------------------*/
4593   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4594   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4595 
4596   /* post the Isend of j-structure */
4597   /*--------------------------------*/
4598   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4599 
4600   for (proc=0, k=0; proc<size; proc++) {
4601     if (!len_s[proc]) continue;
4602     i    = owners[proc];
4603     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4604     k++;
4605   }
4606 
4607   /* receives and sends of j-structure are complete */
4608   /*------------------------------------------------*/
4609   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4610   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4611 
4612   /* send and recv i-structure */
4613   /*---------------------------*/
4614   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4615   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4616 
4617   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4618   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4619   for (proc=0,k=0; proc<size; proc++) {
4620     if (!len_s[proc]) continue;
4621     /* form outgoing message for i-structure:
4622          buf_si[0]:                 nrows to be sent
4623                [1:nrows]:           row index (global)
4624                [nrows+1:2*nrows+1]: i-structure index
4625     */
4626     /*-------------------------------------------*/
4627     nrows       = len_si[proc]/2 - 1;
4628     buf_si_i    = buf_si + nrows+1;
4629     buf_si[0]   = nrows;
4630     buf_si_i[0] = 0;
4631     nrows       = 0;
4632     for (i=owners[proc]; i<owners[proc+1]; i++) {
4633       anzi = ai[i+1] - ai[i];
4634       if (anzi) {
4635         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4636         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4637         nrows++;
4638       }
4639     }
4640     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4641     k++;
4642     buf_si += len_si[proc];
4643   }
4644 
4645   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4646   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4647 
4648   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4649   for (i=0; i<merge->nrecv; i++) {
4650     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4651   }
4652 
4653   ierr = PetscFree(len_si);CHKERRQ(ierr);
4654   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4655   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4656   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4657   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4658   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4659   ierr = PetscFree(status);CHKERRQ(ierr);
4660 
4661   /* compute a local seq matrix in each processor */
4662   /*----------------------------------------------*/
4663   /* allocate bi array and free space for accumulating nonzero column info */
4664   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4665   bi[0] = 0;
4666 
4667   /* create and initialize a linked list */
4668   nlnk = N+1;
4669   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4670 
4671   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4672   len  = ai[owners[rank+1]] - ai[owners[rank]];
4673   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4674 
4675   current_space = free_space;
4676 
4677   /* determine symbolic info for each local row */
4678   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4679 
4680   for (k=0; k<merge->nrecv; k++) {
4681     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4682     nrows       = *buf_ri_k[k];
4683     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4684     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4685   }
4686 
4687   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4688   len  = 0;
4689   for (i=0; i<m; i++) {
4690     bnzi = 0;
4691     /* add local non-zero cols of this proc's seqmat into lnk */
4692     arow  = owners[rank] + i;
4693     anzi  = ai[arow+1] - ai[arow];
4694     aj    = a->j + ai[arow];
4695     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4696     bnzi += nlnk;
4697     /* add received col data into lnk */
4698     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4699       if (i == *nextrow[k]) { /* i-th row */
4700         anzi  = *(nextai[k]+1) - *nextai[k];
4701         aj    = buf_rj[k] + *nextai[k];
4702         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4703         bnzi += nlnk;
4704         nextrow[k]++; nextai[k]++;
4705       }
4706     }
4707     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4708 
4709     /* if free space is not available, make more free space */
4710     if (current_space->local_remaining<bnzi) {
4711       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4712       nspacedouble++;
4713     }
4714     /* copy data into free space, then initialize lnk */
4715     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4716     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4717 
4718     current_space->array           += bnzi;
4719     current_space->local_used      += bnzi;
4720     current_space->local_remaining -= bnzi;
4721 
4722     bi[i+1] = bi[i] + bnzi;
4723   }
4724 
4725   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4726 
4727   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4728   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4729   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4730 
4731   /* create symbolic parallel matrix B_mpi */
4732   /*---------------------------------------*/
4733   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4734   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4735   if (n==PETSC_DECIDE) {
4736     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4737   } else {
4738     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4739   }
4740   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4741   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4742   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4743   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4744   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4745 
4746   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4747   B_mpi->assembled    = PETSC_FALSE;
4748   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4749   merge->bi           = bi;
4750   merge->bj           = bj;
4751   merge->buf_ri       = buf_ri;
4752   merge->buf_rj       = buf_rj;
4753   merge->coi          = NULL;
4754   merge->coj          = NULL;
4755   merge->owners_co    = NULL;
4756 
4757   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4758 
4759   /* attach the supporting struct to B_mpi for reuse */
4760   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4761   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4762   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4763   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4764   *mpimat = B_mpi;
4765 
4766   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4767   PetscFunctionReturn(0);
4768 }
4769 
4770 /*@C
4771       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4772                  matrices from each processor
4773 
4774     Collective on MPI_Comm
4775 
4776    Input Parameters:
4777 +    comm - the communicators the parallel matrix will live on
4778 .    seqmat - the input sequential matrices
4779 .    m - number of local rows (or PETSC_DECIDE)
4780 .    n - number of local columns (or PETSC_DECIDE)
4781 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4782 
4783    Output Parameter:
4784 .    mpimat - the parallel matrix generated
4785 
4786     Level: advanced
4787 
4788    Notes:
4789      The dimensions of the sequential matrix in each processor MUST be the same.
4790      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4791      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4792 @*/
4793 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4794 {
4795   PetscErrorCode ierr;
4796   PetscMPIInt    size;
4797 
4798   PetscFunctionBegin;
4799   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4800   if (size == 1) {
4801     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4802     if (scall == MAT_INITIAL_MATRIX) {
4803       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4804     } else {
4805       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4806     }
4807     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4808     PetscFunctionReturn(0);
4809   }
4810   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4811   if (scall == MAT_INITIAL_MATRIX) {
4812     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4813   }
4814   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4815   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4816   PetscFunctionReturn(0);
4817 }
4818 
4819 /*@
4820      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4821           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4822           with MatGetSize()
4823 
4824     Not Collective
4825 
4826    Input Parameters:
4827 +    A - the matrix
4828 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4829 
4830    Output Parameter:
4831 .    A_loc - the local sequential matrix generated
4832 
4833     Level: developer
4834 
4835 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4836 
4837 @*/
4838 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4839 {
4840   PetscErrorCode ierr;
4841   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4842   Mat_SeqAIJ     *mat,*a,*b;
4843   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4844   MatScalar      *aa,*ba,*cam;
4845   PetscScalar    *ca;
4846   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4847   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4848   PetscBool      match;
4849   MPI_Comm       comm;
4850   PetscMPIInt    size;
4851 
4852   PetscFunctionBegin;
4853   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4854   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4855   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4856   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4857   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4858 
4859   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4860   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4861   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4862   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4863   aa = a->a; ba = b->a;
4864   if (scall == MAT_INITIAL_MATRIX) {
4865     if (size == 1) {
4866       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4867       PetscFunctionReturn(0);
4868     }
4869 
4870     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4871     ci[0] = 0;
4872     for (i=0; i<am; i++) {
4873       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4874     }
4875     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4876     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4877     k    = 0;
4878     for (i=0; i<am; i++) {
4879       ncols_o = bi[i+1] - bi[i];
4880       ncols_d = ai[i+1] - ai[i];
4881       /* off-diagonal portion of A */
4882       for (jo=0; jo<ncols_o; jo++) {
4883         col = cmap[*bj];
4884         if (col >= cstart) break;
4885         cj[k]   = col; bj++;
4886         ca[k++] = *ba++;
4887       }
4888       /* diagonal portion of A */
4889       for (j=0; j<ncols_d; j++) {
4890         cj[k]   = cstart + *aj++;
4891         ca[k++] = *aa++;
4892       }
4893       /* off-diagonal portion of A */
4894       for (j=jo; j<ncols_o; j++) {
4895         cj[k]   = cmap[*bj++];
4896         ca[k++] = *ba++;
4897       }
4898     }
4899     /* put together the new matrix */
4900     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4901     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4902     /* Since these are PETSc arrays, change flags to free them as necessary. */
4903     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4904     mat->free_a  = PETSC_TRUE;
4905     mat->free_ij = PETSC_TRUE;
4906     mat->nonew   = 0;
4907   } else if (scall == MAT_REUSE_MATRIX) {
4908     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4909     ci = mat->i; cj = mat->j; cam = mat->a;
4910     for (i=0; i<am; i++) {
4911       /* off-diagonal portion of A */
4912       ncols_o = bi[i+1] - bi[i];
4913       for (jo=0; jo<ncols_o; jo++) {
4914         col = cmap[*bj];
4915         if (col >= cstart) break;
4916         *cam++ = *ba++; bj++;
4917       }
4918       /* diagonal portion of A */
4919       ncols_d = ai[i+1] - ai[i];
4920       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4921       /* off-diagonal portion of A */
4922       for (j=jo; j<ncols_o; j++) {
4923         *cam++ = *ba++; bj++;
4924       }
4925     }
4926   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4927   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4928   PetscFunctionReturn(0);
4929 }
4930 
4931 /*@C
4932      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4933 
4934     Not Collective
4935 
4936    Input Parameters:
4937 +    A - the matrix
4938 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4939 -    row, col - index sets of rows and columns to extract (or NULL)
4940 
4941    Output Parameter:
4942 .    A_loc - the local sequential matrix generated
4943 
4944     Level: developer
4945 
4946 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4947 
4948 @*/
4949 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4950 {
4951   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4952   PetscErrorCode ierr;
4953   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4954   IS             isrowa,iscola;
4955   Mat            *aloc;
4956   PetscBool      match;
4957 
4958   PetscFunctionBegin;
4959   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4960   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4961   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4962   if (!row) {
4963     start = A->rmap->rstart; end = A->rmap->rend;
4964     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4965   } else {
4966     isrowa = *row;
4967   }
4968   if (!col) {
4969     start = A->cmap->rstart;
4970     cmap  = a->garray;
4971     nzA   = a->A->cmap->n;
4972     nzB   = a->B->cmap->n;
4973     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
4974     ncols = 0;
4975     for (i=0; i<nzB; i++) {
4976       if (cmap[i] < start) idx[ncols++] = cmap[i];
4977       else break;
4978     }
4979     imark = i;
4980     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4981     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4982     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4983   } else {
4984     iscola = *col;
4985   }
4986   if (scall != MAT_INITIAL_MATRIX) {
4987     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
4988     aloc[0] = *A_loc;
4989   }
4990   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4991   *A_loc = aloc[0];
4992   ierr   = PetscFree(aloc);CHKERRQ(ierr);
4993   if (!row) {
4994     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
4995   }
4996   if (!col) {
4997     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
4998   }
4999   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5000   PetscFunctionReturn(0);
5001 }
5002 
5003 /*@C
5004     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5005 
5006     Collective on Mat
5007 
5008    Input Parameters:
5009 +    A,B - the matrices in mpiaij format
5010 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5011 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5012 
5013    Output Parameter:
5014 +    rowb, colb - index sets of rows and columns of B to extract
5015 -    B_seq - the sequential matrix generated
5016 
5017     Level: developer
5018 
5019 @*/
5020 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5021 {
5022   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5023   PetscErrorCode ierr;
5024   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5025   IS             isrowb,iscolb;
5026   Mat            *bseq=NULL;
5027 
5028   PetscFunctionBegin;
5029   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5030     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5031   }
5032   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5033 
5034   if (scall == MAT_INITIAL_MATRIX) {
5035     start = A->cmap->rstart;
5036     cmap  = a->garray;
5037     nzA   = a->A->cmap->n;
5038     nzB   = a->B->cmap->n;
5039     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5040     ncols = 0;
5041     for (i=0; i<nzB; i++) {  /* row < local row index */
5042       if (cmap[i] < start) idx[ncols++] = cmap[i];
5043       else break;
5044     }
5045     imark = i;
5046     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5047     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5048     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5049     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5050   } else {
5051     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5052     isrowb  = *rowb; iscolb = *colb;
5053     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5054     bseq[0] = *B_seq;
5055   }
5056   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5057   *B_seq = bseq[0];
5058   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5059   if (!rowb) {
5060     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5061   } else {
5062     *rowb = isrowb;
5063   }
5064   if (!colb) {
5065     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5066   } else {
5067     *colb = iscolb;
5068   }
5069   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5070   PetscFunctionReturn(0);
5071 }
5072 
5073 /*
5074     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5075     of the OFF-DIAGONAL portion of local A
5076 
5077     Collective on Mat
5078 
5079    Input Parameters:
5080 +    A,B - the matrices in mpiaij format
5081 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5082 
5083    Output Parameter:
5084 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5085 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5086 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5087 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5088 
5089     Level: developer
5090 
5091 */
5092 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5093 {
5094   VecScatter_MPI_General *gen_to,*gen_from;
5095   PetscErrorCode         ierr;
5096   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5097   Mat_SeqAIJ             *b_oth;
5098   VecScatter             ctx =a->Mvctx;
5099   MPI_Comm               comm;
5100   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5101   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5102   PetscInt               *rvalues,*svalues;
5103   MatScalar              *b_otha,*bufa,*bufA;
5104   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5105   MPI_Request            *rwaits = NULL,*swaits = NULL;
5106   MPI_Status             *sstatus,rstatus;
5107   PetscMPIInt            jj,size;
5108   PetscInt               *cols,sbs,rbs;
5109   PetscScalar            *vals;
5110 
5111   PetscFunctionBegin;
5112   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5113   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5114 
5115   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5116     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5117   }
5118   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5119   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5120 
5121   if (size == 1) {
5122     startsj_s = NULL;
5123     bufa_ptr  = NULL;
5124     *B_oth    = NULL;
5125     PetscFunctionReturn(0);
5126   }
5127 
5128   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5129   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5130   nrecvs   = gen_from->n;
5131   nsends   = gen_to->n;
5132 
5133   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5134   srow    = gen_to->indices;    /* local row index to be sent */
5135   sstarts = gen_to->starts;
5136   sprocs  = gen_to->procs;
5137   sstatus = gen_to->sstatus;
5138   sbs     = gen_to->bs;
5139   rstarts = gen_from->starts;
5140   rprocs  = gen_from->procs;
5141   rbs     = gen_from->bs;
5142 
5143   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5144   if (scall == MAT_INITIAL_MATRIX) {
5145     /* i-array */
5146     /*---------*/
5147     /*  post receives */
5148     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5149     for (i=0; i<nrecvs; i++) {
5150       rowlen = rvalues + rstarts[i]*rbs;
5151       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5152       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5153     }
5154 
5155     /* pack the outgoing message */
5156     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5157 
5158     sstartsj[0] = 0;
5159     rstartsj[0] = 0;
5160     len         = 0; /* total length of j or a array to be sent */
5161     k           = 0;
5162     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5163     for (i=0; i<nsends; i++) {
5164       rowlen = svalues + sstarts[i]*sbs;
5165       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5166       for (j=0; j<nrows; j++) {
5167         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5168         for (l=0; l<sbs; l++) {
5169           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5170 
5171           rowlen[j*sbs+l] = ncols;
5172 
5173           len += ncols;
5174           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5175         }
5176         k++;
5177       }
5178       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5179 
5180       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5181     }
5182     /* recvs and sends of i-array are completed */
5183     i = nrecvs;
5184     while (i--) {
5185       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5186     }
5187     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5188     ierr = PetscFree(svalues);CHKERRQ(ierr);
5189 
5190     /* allocate buffers for sending j and a arrays */
5191     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5192     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5193 
5194     /* create i-array of B_oth */
5195     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5196 
5197     b_othi[0] = 0;
5198     len       = 0; /* total length of j or a array to be received */
5199     k         = 0;
5200     for (i=0; i<nrecvs; i++) {
5201       rowlen = rvalues + rstarts[i]*rbs;
5202       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5203       for (j=0; j<nrows; j++) {
5204         b_othi[k+1] = b_othi[k] + rowlen[j];
5205         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5206         k++;
5207       }
5208       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5209     }
5210     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5211 
5212     /* allocate space for j and a arrrays of B_oth */
5213     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5214     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5215 
5216     /* j-array */
5217     /*---------*/
5218     /*  post receives of j-array */
5219     for (i=0; i<nrecvs; i++) {
5220       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5221       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5222     }
5223 
5224     /* pack the outgoing message j-array */
5225     k = 0;
5226     for (i=0; i<nsends; i++) {
5227       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5228       bufJ  = bufj+sstartsj[i];
5229       for (j=0; j<nrows; j++) {
5230         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5231         for (ll=0; ll<sbs; ll++) {
5232           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5233           for (l=0; l<ncols; l++) {
5234             *bufJ++ = cols[l];
5235           }
5236           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5237         }
5238       }
5239       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5240     }
5241 
5242     /* recvs and sends of j-array are completed */
5243     i = nrecvs;
5244     while (i--) {
5245       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5246     }
5247     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5248   } else if (scall == MAT_REUSE_MATRIX) {
5249     sstartsj = *startsj_s;
5250     rstartsj = *startsj_r;
5251     bufa     = *bufa_ptr;
5252     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5253     b_otha   = b_oth->a;
5254   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5255 
5256   /* a-array */
5257   /*---------*/
5258   /*  post receives of a-array */
5259   for (i=0; i<nrecvs; i++) {
5260     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5261     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5262   }
5263 
5264   /* pack the outgoing message a-array */
5265   k = 0;
5266   for (i=0; i<nsends; i++) {
5267     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5268     bufA  = bufa+sstartsj[i];
5269     for (j=0; j<nrows; j++) {
5270       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5271       for (ll=0; ll<sbs; ll++) {
5272         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5273         for (l=0; l<ncols; l++) {
5274           *bufA++ = vals[l];
5275         }
5276         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5277       }
5278     }
5279     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5280   }
5281   /* recvs and sends of a-array are completed */
5282   i = nrecvs;
5283   while (i--) {
5284     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5285   }
5286   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5287   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5288 
5289   if (scall == MAT_INITIAL_MATRIX) {
5290     /* put together the new matrix */
5291     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5292 
5293     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5294     /* Since these are PETSc arrays, change flags to free them as necessary. */
5295     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5296     b_oth->free_a  = PETSC_TRUE;
5297     b_oth->free_ij = PETSC_TRUE;
5298     b_oth->nonew   = 0;
5299 
5300     ierr = PetscFree(bufj);CHKERRQ(ierr);
5301     if (!startsj_s || !bufa_ptr) {
5302       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5303       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5304     } else {
5305       *startsj_s = sstartsj;
5306       *startsj_r = rstartsj;
5307       *bufa_ptr  = bufa;
5308     }
5309   }
5310   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5311   PetscFunctionReturn(0);
5312 }
5313 
5314 /*@C
5315   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5316 
5317   Not Collective
5318 
5319   Input Parameters:
5320 . A - The matrix in mpiaij format
5321 
5322   Output Parameter:
5323 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5324 . colmap - A map from global column index to local index into lvec
5325 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5326 
5327   Level: developer
5328 
5329 @*/
5330 #if defined(PETSC_USE_CTABLE)
5331 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5332 #else
5333 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5334 #endif
5335 {
5336   Mat_MPIAIJ *a;
5337 
5338   PetscFunctionBegin;
5339   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5340   PetscValidPointer(lvec, 2);
5341   PetscValidPointer(colmap, 3);
5342   PetscValidPointer(multScatter, 4);
5343   a = (Mat_MPIAIJ*) A->data;
5344   if (lvec) *lvec = a->lvec;
5345   if (colmap) *colmap = a->colmap;
5346   if (multScatter) *multScatter = a->Mvctx;
5347   PetscFunctionReturn(0);
5348 }
5349 
5350 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5351 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5352 #if defined(PETSC_HAVE_MKL_SPARSE)
5353 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5354 #endif
5355 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5356 #if defined(PETSC_HAVE_ELEMENTAL)
5357 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5358 #endif
5359 #if defined(PETSC_HAVE_HYPRE)
5360 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5361 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5362 #endif
5363 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5364 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIELL(Mat,MatType,MatReuse,Mat*);
5365 
5366 /*
5367     Computes (B'*A')' since computing B*A directly is untenable
5368 
5369                n                       p                          p
5370         (              )       (              )         (                  )
5371       m (      A       )  *  n (       B      )   =   m (         C        )
5372         (              )       (              )         (                  )
5373 
5374 */
5375 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5376 {
5377   PetscErrorCode ierr;
5378   Mat            At,Bt,Ct;
5379 
5380   PetscFunctionBegin;
5381   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5382   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5383   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5384   ierr = MatDestroy(&At);CHKERRQ(ierr);
5385   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5386   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5387   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5388   PetscFunctionReturn(0);
5389 }
5390 
5391 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5392 {
5393   PetscErrorCode ierr;
5394   PetscInt       m=A->rmap->n,n=B->cmap->n;
5395   Mat            Cmat;
5396 
5397   PetscFunctionBegin;
5398   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5399   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5400   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5401   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5402   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5403   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5404   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5405   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5406 
5407   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5408 
5409   *C = Cmat;
5410   PetscFunctionReturn(0);
5411 }
5412 
5413 /* ----------------------------------------------------------------*/
5414 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5415 {
5416   PetscErrorCode ierr;
5417 
5418   PetscFunctionBegin;
5419   if (scall == MAT_INITIAL_MATRIX) {
5420     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5421     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5422     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5423   }
5424   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5425   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5426   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5427   PetscFunctionReturn(0);
5428 }
5429 
5430 /*MC
5431    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5432 
5433    Options Database Keys:
5434 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5435 
5436   Level: beginner
5437 
5438 .seealso: MatCreateAIJ()
5439 M*/
5440 
5441 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5442 {
5443   Mat_MPIAIJ     *b;
5444   PetscErrorCode ierr;
5445   PetscMPIInt    size;
5446 
5447   PetscFunctionBegin;
5448   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5449 
5450   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5451   B->data       = (void*)b;
5452   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5453   B->assembled  = PETSC_FALSE;
5454   B->insertmode = NOT_SET_VALUES;
5455   b->size       = size;
5456 
5457   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5458 
5459   /* build cache for off array entries formed */
5460   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5461 
5462   b->donotstash  = PETSC_FALSE;
5463   b->colmap      = 0;
5464   b->garray      = 0;
5465   b->roworiented = PETSC_TRUE;
5466 
5467   /* stuff used for matrix vector multiply */
5468   b->lvec  = NULL;
5469   b->Mvctx = NULL;
5470 
5471   /* stuff for MatGetRow() */
5472   b->rowindices   = 0;
5473   b->rowvalues    = 0;
5474   b->getrowactive = PETSC_FALSE;
5475 
5476   /* flexible pointer used in CUSP/CUSPARSE classes */
5477   b->spptr = NULL;
5478 
5479   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5480   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5481   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5482   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5483   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5484   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5485   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5486   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5487 #if defined(PETSC_HAVE_MKL_SPARSE)
5488   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5489 #endif
5490   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5491   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5492 #if defined(PETSC_HAVE_ELEMENTAL)
5493   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5494 #endif
5495 #if defined(PETSC_HAVE_HYPRE)
5496   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5497 #endif
5498   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5499   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiell_C",MatConvert_MPIAIJ_MPIELL);CHKERRQ(ierr);
5500   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5501   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5502   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5503 #if defined(PETSC_HAVE_HYPRE)
5504   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5505 #endif
5506   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5507   PetscFunctionReturn(0);
5508 }
5509 
5510 /*@C
5511      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5512          and "off-diagonal" part of the matrix in CSR format.
5513 
5514    Collective on MPI_Comm
5515 
5516    Input Parameters:
5517 +  comm - MPI communicator
5518 .  m - number of local rows (Cannot be PETSC_DECIDE)
5519 .  n - This value should be the same as the local size used in creating the
5520        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5521        calculated if N is given) For square matrices n is almost always m.
5522 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5523 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5524 .   i - row indices for "diagonal" portion of matrix
5525 .   j - column indices
5526 .   a - matrix values
5527 .   oi - row indices for "off-diagonal" portion of matrix
5528 .   oj - column indices
5529 -   oa - matrix values
5530 
5531    Output Parameter:
5532 .   mat - the matrix
5533 
5534    Level: advanced
5535 
5536    Notes:
5537        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5538        must free the arrays once the matrix has been destroyed and not before.
5539 
5540        The i and j indices are 0 based
5541 
5542        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5543 
5544        This sets local rows and cannot be used to set off-processor values.
5545 
5546        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5547        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5548        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5549        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5550        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5551        communication if it is known that only local entries will be set.
5552 
5553 .keywords: matrix, aij, compressed row, sparse, parallel
5554 
5555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5556           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5557 @*/
5558 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5559 {
5560   PetscErrorCode ierr;
5561   Mat_MPIAIJ     *maij;
5562 
5563   PetscFunctionBegin;
5564   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5565   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5566   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5567   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5568   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5569   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5570   maij = (Mat_MPIAIJ*) (*mat)->data;
5571 
5572   (*mat)->preallocated = PETSC_TRUE;
5573 
5574   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5575   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5576 
5577   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5578   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5579 
5580   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5581   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5582   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5583   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5584 
5585   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5586   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5587   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5588   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5589   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5590   PetscFunctionReturn(0);
5591 }
5592 
5593 /*
5594     Special version for direct calls from Fortran
5595 */
5596 #include <petsc/private/fortranimpl.h>
5597 
5598 /* Change these macros so can be used in void function */
5599 #undef CHKERRQ
5600 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5601 #undef SETERRQ2
5602 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5603 #undef SETERRQ3
5604 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5605 #undef SETERRQ
5606 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5607 
5608 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5609 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5610 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5611 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5612 #else
5613 #endif
5614 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5615 {
5616   Mat            mat  = *mmat;
5617   PetscInt       m    = *mm, n = *mn;
5618   InsertMode     addv = *maddv;
5619   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5620   PetscScalar    value;
5621   PetscErrorCode ierr;
5622 
5623   MatCheckPreallocated(mat,1);
5624   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5625 
5626 #if defined(PETSC_USE_DEBUG)
5627   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5628 #endif
5629   {
5630     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5631     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5632     PetscBool roworiented = aij->roworiented;
5633 
5634     /* Some Variables required in the macro */
5635     Mat        A                 = aij->A;
5636     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5637     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5638     MatScalar  *aa               = a->a;
5639     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5640     Mat        B                 = aij->B;
5641     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5642     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5643     MatScalar  *ba               = b->a;
5644 
5645     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5646     PetscInt  nonew = a->nonew;
5647     MatScalar *ap1,*ap2;
5648 
5649     PetscFunctionBegin;
5650     for (i=0; i<m; i++) {
5651       if (im[i] < 0) continue;
5652 #if defined(PETSC_USE_DEBUG)
5653       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5654 #endif
5655       if (im[i] >= rstart && im[i] < rend) {
5656         row      = im[i] - rstart;
5657         lastcol1 = -1;
5658         rp1      = aj + ai[row];
5659         ap1      = aa + ai[row];
5660         rmax1    = aimax[row];
5661         nrow1    = ailen[row];
5662         low1     = 0;
5663         high1    = nrow1;
5664         lastcol2 = -1;
5665         rp2      = bj + bi[row];
5666         ap2      = ba + bi[row];
5667         rmax2    = bimax[row];
5668         nrow2    = bilen[row];
5669         low2     = 0;
5670         high2    = nrow2;
5671 
5672         for (j=0; j<n; j++) {
5673           if (roworiented) value = v[i*n+j];
5674           else value = v[i+j*m];
5675           if (in[j] >= cstart && in[j] < cend) {
5676             col = in[j] - cstart;
5677             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5678             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5679           } else if (in[j] < 0) continue;
5680 #if defined(PETSC_USE_DEBUG)
5681           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5682 #endif
5683           else {
5684             if (mat->was_assembled) {
5685               if (!aij->colmap) {
5686                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5687               }
5688 #if defined(PETSC_USE_CTABLE)
5689               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5690               col--;
5691 #else
5692               col = aij->colmap[in[j]] - 1;
5693 #endif
5694               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5695               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5696                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5697                 col  =  in[j];
5698                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5699                 B     = aij->B;
5700                 b     = (Mat_SeqAIJ*)B->data;
5701                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5702                 rp2   = bj + bi[row];
5703                 ap2   = ba + bi[row];
5704                 rmax2 = bimax[row];
5705                 nrow2 = bilen[row];
5706                 low2  = 0;
5707                 high2 = nrow2;
5708                 bm    = aij->B->rmap->n;
5709                 ba    = b->a;
5710               }
5711             } else col = in[j];
5712             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5713           }
5714         }
5715       } else if (!aij->donotstash) {
5716         if (roworiented) {
5717           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5718         } else {
5719           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5720         }
5721       }
5722     }
5723   }
5724   PetscFunctionReturnVoid();
5725 }
5726 
5727