xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4b5b0a90afb5fcce7aaedd162dfad7cc508e9e01)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
48 {
49   PetscErrorCode ierr;
50   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
51 
52   PetscFunctionBegin;
53   if (mat->A) {
54     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
55     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
56   }
57   PetscFunctionReturn(0);
58 }
59 
60 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
61 {
62   PetscErrorCode  ierr;
63   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
64   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
65   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
66   const PetscInt  *ia,*ib;
67   const MatScalar *aa,*bb;
68   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
69   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
70 
71   PetscFunctionBegin;
72   *keptrows = 0;
73   ia        = a->i;
74   ib        = b->i;
75   for (i=0; i<m; i++) {
76     na = ia[i+1] - ia[i];
77     nb = ib[i+1] - ib[i];
78     if (!na && !nb) {
79       cnt++;
80       goto ok1;
81     }
82     aa = a->a + ia[i];
83     for (j=0; j<na; j++) {
84       if (aa[j] != 0.0) goto ok1;
85     }
86     bb = b->a + ib[i];
87     for (j=0; j <nb; j++) {
88       if (bb[j] != 0.0) goto ok1;
89     }
90     cnt++;
91 ok1:;
92   }
93   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
94   if (!n0rows) PetscFunctionReturn(0);
95   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
96   cnt  = 0;
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) continue;
101     aa = a->a + ia[i];
102     for (j=0; j<na;j++) {
103       if (aa[j] != 0.0) {
104         rows[cnt++] = rstart + i;
105         goto ok2;
106       }
107     }
108     bb = b->a + ib[i];
109     for (j=0; j<nb; j++) {
110       if (bb[j] != 0.0) {
111         rows[cnt++] = rstart + i;
112         goto ok2;
113       }
114     }
115 ok2:;
116   }
117   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
118   PetscFunctionReturn(0);
119 }
120 
121 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
122 {
123   PetscErrorCode    ierr;
124   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
125 
126   PetscFunctionBegin;
127   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
128     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
129   } else {
130     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
131   }
132   PetscFunctionReturn(0);
133 }
134 
135 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
138   PetscErrorCode ierr;
139   PetscInt       i,rstart,nrows,*rows;
140 
141   PetscFunctionBegin;
142   *zrows = NULL;
143   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
144   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
145   for (i=0; i<nrows; i++) rows[i] += rstart;
146   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
147   PetscFunctionReturn(0);
148 }
149 
150 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
151 {
152   PetscErrorCode ierr;
153   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
154   PetscInt       i,n,*garray = aij->garray;
155   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
156   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
157   PetscReal      *work;
158 
159   PetscFunctionBegin;
160   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
161   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
162   if (type == NORM_2) {
163     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
164       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
165     }
166     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
167       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
168     }
169   } else if (type == NORM_1) {
170     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
171       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
172     }
173     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
174       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
175     }
176   } else if (type == NORM_INFINITY) {
177     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
178       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
179     }
180     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
181       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
182     }
183 
184   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
185   if (type == NORM_INFINITY) {
186     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
187   } else {
188     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
189   }
190   ierr = PetscFree(work);CHKERRQ(ierr);
191   if (type == NORM_2) {
192     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
193   }
194   PetscFunctionReturn(0);
195 }
196 
197 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
198 {
199   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
200   IS              sis,gis;
201   PetscErrorCode  ierr;
202   const PetscInt  *isis,*igis;
203   PetscInt        n,*iis,nsis,ngis,rstart,i;
204 
205   PetscFunctionBegin;
206   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
207   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
208   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
209   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
210   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
211   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
212 
213   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
215   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
216   n    = ngis + nsis;
217   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
218   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
219   for (i=0; i<n; i++) iis[i] += rstart;
220   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
221 
222   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
223   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
224   ierr = ISDestroy(&sis);CHKERRQ(ierr);
225   ierr = ISDestroy(&gis);CHKERRQ(ierr);
226   PetscFunctionReturn(0);
227 }
228 
229 /*
230     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
231     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
232 
233     Only for square matrices
234 
235     Used by a preconditioner, hence PETSC_EXTERN
236 */
237 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
238 {
239   PetscMPIInt    rank,size;
240   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
241   PetscErrorCode ierr;
242   Mat            mat;
243   Mat_SeqAIJ     *gmata;
244   PetscMPIInt    tag;
245   MPI_Status     status;
246   PetscBool      aij;
247   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
248 
249   PetscFunctionBegin;
250   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
251   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
252   if (!rank) {
253     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
254     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
255   }
256   if (reuse == MAT_INITIAL_MATRIX) {
257     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
258     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
259     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
260     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
261     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
262     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
263     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
264     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
265     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
266 
267     rowners[0] = 0;
268     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
269     rstart = rowners[rank];
270     rend   = rowners[rank+1];
271     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
272     if (!rank) {
273       gmata = (Mat_SeqAIJ*) gmat->data;
274       /* send row lengths to all processors */
275       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
276       for (i=1; i<size; i++) {
277         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
278       }
279       /* determine number diagonal and off-diagonal counts */
280       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
281       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
282       jj   = 0;
283       for (i=0; i<m; i++) {
284         for (j=0; j<dlens[i]; j++) {
285           if (gmata->j[jj] < rstart) ld[i]++;
286           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
287           jj++;
288         }
289       }
290       /* send column indices to other processes */
291       for (i=1; i<size; i++) {
292         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
293         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
295       }
296 
297       /* send numerical values to other processes */
298       for (i=1; i<size; i++) {
299         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
300         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
301       }
302       gmataa = gmata->a;
303       gmataj = gmata->j;
304 
305     } else {
306       /* receive row lengths */
307       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
308       /* receive column indices */
309       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
310       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
311       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
312       /* determine number diagonal and off-diagonal counts */
313       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
314       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
315       jj   = 0;
316       for (i=0; i<m; i++) {
317         for (j=0; j<dlens[i]; j++) {
318           if (gmataj[jj] < rstart) ld[i]++;
319           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
320           jj++;
321         }
322       }
323       /* receive numerical values */
324       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
325       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
326     }
327     /* set preallocation */
328     for (i=0; i<m; i++) {
329       dlens[i] -= olens[i];
330     }
331     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
332     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
333 
334     for (i=0; i<m; i++) {
335       dlens[i] += olens[i];
336     }
337     cnt = 0;
338     for (i=0; i<m; i++) {
339       row  = rstart + i;
340       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
341       cnt += dlens[i];
342     }
343     if (rank) {
344       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
345     }
346     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
347     ierr = PetscFree(rowners);CHKERRQ(ierr);
348 
349     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
350 
351     *inmat = mat;
352   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
353     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
354     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
355     mat  = *inmat;
356     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
357     if (!rank) {
358       /* send numerical values to other processes */
359       gmata  = (Mat_SeqAIJ*) gmat->data;
360       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
361       gmataa = gmata->a;
362       for (i=1; i<size; i++) {
363         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
364         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
365       }
366       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
367     } else {
368       /* receive numerical values from process 0*/
369       nz   = Ad->nz + Ao->nz;
370       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
371       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
372     }
373     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
374     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
375     ad = Ad->a;
376     ao = Ao->a;
377     if (mat->rmap->n) {
378       i  = 0;
379       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
380       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
381     }
382     for (i=1; i<mat->rmap->n; i++) {
383       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
384       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
385     }
386     i--;
387     if (mat->rmap->n) {
388       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
389     }
390     if (rank) {
391       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
392     }
393   }
394   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
396   PetscFunctionReturn(0);
397 }
398 
399 /*
400   Local utility routine that creates a mapping from the global column
401 number to the local number in the off-diagonal part of the local
402 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
403 a slightly higher hash table cost; without it it is not scalable (each processor
404 has an order N integer array but is fast to acess.
405 */
406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
407 {
408   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
409   PetscErrorCode ierr;
410   PetscInt       n = aij->B->cmap->n,i;
411 
412   PetscFunctionBegin;
413   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
414 #if defined(PETSC_USE_CTABLE)
415   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
416   for (i=0; i<n; i++) {
417     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
418   }
419 #else
420   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
421   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
422   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
423 #endif
424   PetscFunctionReturn(0);
425 }
426 
427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
428 { \
429     if (col <= lastcol1)  low1 = 0;     \
430     else                 high1 = nrow1; \
431     lastcol1 = col;\
432     while (high1-low1 > 5) { \
433       t = (low1+high1)/2; \
434       if (rp1[t] > col) high1 = t; \
435       else              low1  = t; \
436     } \
437       for (_i=low1; _i<high1; _i++) { \
438         if (rp1[_i] > col) break; \
439         if (rp1[_i] == col) { \
440           if (addv == ADD_VALUES) ap1[_i] += value;   \
441           else                    ap1[_i] = value; \
442           goto a_noinsert; \
443         } \
444       }  \
445       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
446       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
447       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
448       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
449       N = nrow1++ - 1; a->nz++; high1++; \
450       /* shift up all the later entries in this row */ \
451       for (ii=N; ii>=_i; ii--) { \
452         rp1[ii+1] = rp1[ii]; \
453         ap1[ii+1] = ap1[ii]; \
454       } \
455       rp1[_i] = col;  \
456       ap1[_i] = value;  \
457       A->nonzerostate++;\
458       a_noinsert: ; \
459       ailen[row] = nrow1; \
460 }
461 
462 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
463   { \
464     if (col <= lastcol2) low2 = 0;                        \
465     else high2 = nrow2;                                   \
466     lastcol2 = col;                                       \
467     while (high2-low2 > 5) {                              \
468       t = (low2+high2)/2;                                 \
469       if (rp2[t] > col) high2 = t;                        \
470       else             low2  = t;                         \
471     }                                                     \
472     for (_i=low2; _i<high2; _i++) {                       \
473       if (rp2[_i] > col) break;                           \
474       if (rp2[_i] == col) {                               \
475         if (addv == ADD_VALUES) ap2[_i] += value;         \
476         else                    ap2[_i] = value;          \
477         goto b_noinsert;                                  \
478       }                                                   \
479     }                                                     \
480     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
481     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
482     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
483     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
484     N = nrow2++ - 1; b->nz++; high2++;                    \
485     /* shift up all the later entries in this row */      \
486     for (ii=N; ii>=_i; ii--) {                            \
487       rp2[ii+1] = rp2[ii];                                \
488       ap2[ii+1] = ap2[ii];                                \
489     }                                                     \
490     rp2[_i] = col;                                        \
491     ap2[_i] = value;                                      \
492     B->nonzerostate++;                                    \
493     b_noinsert: ;                                         \
494     bilen[row] = nrow2;                                   \
495   }
496 
497 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
498 {
499   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
500   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
501   PetscErrorCode ierr;
502   PetscInt       l,*garray = mat->garray,diag;
503 
504   PetscFunctionBegin;
505   /* code only works for square matrices A */
506 
507   /* find size of row to the left of the diagonal part */
508   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
509   row  = row - diag;
510   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
511     if (garray[b->j[b->i[row]+l]] > diag) break;
512   }
513   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
514 
515   /* diagonal part */
516   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
517 
518   /* right of diagonal part */
519   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
520   PetscFunctionReturn(0);
521 }
522 
523 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
524 {
525   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
526   PetscScalar    value;
527   PetscErrorCode ierr;
528   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
529   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
530   PetscBool      roworiented = aij->roworiented;
531 
532   /* Some Variables required in the macro */
533   Mat        A                 = aij->A;
534   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
535   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
536   MatScalar  *aa               = a->a;
537   PetscBool  ignorezeroentries = a->ignorezeroentries;
538   Mat        B                 = aij->B;
539   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
540   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
541   MatScalar  *ba               = b->a;
542 
543   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
544   PetscInt  nonew;
545   MatScalar *ap1,*ap2;
546 
547   PetscFunctionBegin;
548   for (i=0; i<m; i++) {
549     if (im[i] < 0) continue;
550 #if defined(PETSC_USE_DEBUG)
551     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
552 #endif
553     if (im[i] >= rstart && im[i] < rend) {
554       row      = im[i] - rstart;
555       lastcol1 = -1;
556       rp1      = aj + ai[row];
557       ap1      = aa + ai[row];
558       rmax1    = aimax[row];
559       nrow1    = ailen[row];
560       low1     = 0;
561       high1    = nrow1;
562       lastcol2 = -1;
563       rp2      = bj + bi[row];
564       ap2      = ba + bi[row];
565       rmax2    = bimax[row];
566       nrow2    = bilen[row];
567       low2     = 0;
568       high2    = nrow2;
569 
570       for (j=0; j<n; j++) {
571         if (roworiented) value = v[i*n+j];
572         else             value = v[i+j*m];
573         if (in[j] >= cstart && in[j] < cend) {
574           col   = in[j] - cstart;
575           nonew = a->nonew;
576           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
577           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
578         } else if (in[j] < 0) continue;
579 #if defined(PETSC_USE_DEBUG)
580         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
581 #endif
582         else {
583           if (mat->was_assembled) {
584             if (!aij->colmap) {
585               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
586             }
587 #if defined(PETSC_USE_CTABLE)
588             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
589             col--;
590 #else
591             col = aij->colmap[in[j]] - 1;
592 #endif
593             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
594               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
595               col  =  in[j];
596               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
597               B     = aij->B;
598               b     = (Mat_SeqAIJ*)B->data;
599               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
600               rp2   = bj + bi[row];
601               ap2   = ba + bi[row];
602               rmax2 = bimax[row];
603               nrow2 = bilen[row];
604               low2  = 0;
605               high2 = nrow2;
606               bm    = aij->B->rmap->n;
607               ba    = b->a;
608             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
609           } else col = in[j];
610           nonew = b->nonew;
611           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
612         }
613       }
614     } else {
615       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
616       if (!aij->donotstash) {
617         mat->assembled = PETSC_FALSE;
618         if (roworiented) {
619           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
620         } else {
621           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
622         }
623       }
624     }
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
630 {
631   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
632   PetscErrorCode ierr;
633   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
634   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
635 
636   PetscFunctionBegin;
637   for (i=0; i<m; i++) {
638     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
639     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
640     if (idxm[i] >= rstart && idxm[i] < rend) {
641       row = idxm[i] - rstart;
642       for (j=0; j<n; j++) {
643         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
644         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
645         if (idxn[j] >= cstart && idxn[j] < cend) {
646           col  = idxn[j] - cstart;
647           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
648         } else {
649           if (!aij->colmap) {
650             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
651           }
652 #if defined(PETSC_USE_CTABLE)
653           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
654           col--;
655 #else
656           col = aij->colmap[idxn[j]] - 1;
657 #endif
658           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
659           else {
660             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
670 
671 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
672 {
673   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
674   PetscErrorCode ierr;
675   PetscInt       nstash,reallocs;
676 
677   PetscFunctionBegin;
678   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
679 
680   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
681   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
682   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
683   PetscFunctionReturn(0);
684 }
685 
686 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
687 {
688   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
689   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
690   PetscErrorCode ierr;
691   PetscMPIInt    n;
692   PetscInt       i,j,rstart,ncols,flg;
693   PetscInt       *row,*col;
694   PetscBool      other_disassembled;
695   PetscScalar    *val;
696 
697   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
698 
699   PetscFunctionBegin;
700   if (!aij->donotstash && !mat->nooffprocentries) {
701     while (1) {
702       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
703       if (!flg) break;
704 
705       for (i=0; i<n; ) {
706         /* Now identify the consecutive vals belonging to the same row */
707         for (j=i,rstart=row[j]; j<n; j++) {
708           if (row[j] != rstart) break;
709         }
710         if (j < n) ncols = j-i;
711         else       ncols = n-i;
712         /* Now assemble all these values with a single function call */
713         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
714 
715         i = j;
716       }
717     }
718     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
719   }
720   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
721   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
722 
723   /* determine if any processor has disassembled, if so we must
724      also disassemble ourselfs, in order that we may reassemble. */
725   /*
726      if nonzero structure of submatrix B cannot change then we know that
727      no processor disassembled thus we can skip this stuff
728   */
729   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
730     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
731     if (mat->was_assembled && !other_disassembled) {
732       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
733     }
734   }
735   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
736     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
737   }
738   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
739   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
740   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
741 
742   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
743 
744   aij->rowvalues = 0;
745 
746   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
747   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
748 
749   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
750   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
751     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
752     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
753   }
754   PetscFunctionReturn(0);
755 }
756 
757 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
758 {
759   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
760   PetscErrorCode ierr;
761 
762   PetscFunctionBegin;
763   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
764   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
765   PetscFunctionReturn(0);
766 }
767 
768 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
769 {
770   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
771   PetscInt      *lrows;
772   PetscInt       r, len;
773   PetscErrorCode ierr;
774 
775   PetscFunctionBegin;
776   /* get locally owned rows */
777   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
778   /* fix right hand side if needed */
779   if (x && b) {
780     const PetscScalar *xx;
781     PetscScalar       *bb;
782 
783     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
784     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
785     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
786     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
787     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
788   }
789   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
790   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
791   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
792     PetscBool cong;
793     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
794     if (cong) A->congruentlayouts = 1;
795     else      A->congruentlayouts = 0;
796   }
797   if ((diag != 0.0) && A->congruentlayouts) {
798     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
799   } else if (diag != 0.0) {
800     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
801     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
802     for (r = 0; r < len; ++r) {
803       const PetscInt row = lrows[r] + A->rmap->rstart;
804       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
805     }
806     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
808   } else {
809     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
810   }
811   ierr = PetscFree(lrows);CHKERRQ(ierr);
812 
813   /* only change matrix nonzero state if pattern was allowed to be changed */
814   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
815     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
816     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
817   }
818   PetscFunctionReturn(0);
819 }
820 
821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
822 {
823   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
824   PetscErrorCode    ierr;
825   PetscMPIInt       n = A->rmap->n;
826   PetscInt          i,j,r,m,p = 0,len = 0;
827   PetscInt          *lrows,*owners = A->rmap->range;
828   PetscSFNode       *rrows;
829   PetscSF           sf;
830   const PetscScalar *xx;
831   PetscScalar       *bb,*mask;
832   Vec               xmask,lmask;
833   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
834   const PetscInt    *aj, *ii,*ridx;
835   PetscScalar       *aa;
836 
837   PetscFunctionBegin;
838   /* Create SF where leaves are input rows and roots are owned rows */
839   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
840   for (r = 0; r < n; ++r) lrows[r] = -1;
841   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
842   for (r = 0; r < N; ++r) {
843     const PetscInt idx   = rows[r];
844     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
845     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
846       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
847     }
848     rrows[r].rank  = p;
849     rrows[r].index = rows[r] - owners[p];
850   }
851   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
852   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
853   /* Collect flags for rows to be zeroed */
854   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
856   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
857   /* Compress and put in row numbers */
858   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
859   /* zero diagonal part of matrix */
860   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
861   /* handle off diagonal part of matrix */
862   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
863   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
864   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
865   for (i=0; i<len; i++) bb[lrows[i]] = 1;
866   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
867   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
870   if (x) {
871     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
874     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
875   }
876   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
877   /* remove zeroed rows of off diagonal matrix */
878   ii = aij->i;
879   for (i=0; i<len; i++) {
880     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
881   }
882   /* loop over all elements of off process part of matrix zeroing removed columns*/
883   if (aij->compressedrow.use) {
884     m    = aij->compressedrow.nrows;
885     ii   = aij->compressedrow.i;
886     ridx = aij->compressedrow.rindex;
887     for (i=0; i<m; i++) {
888       n  = ii[i+1] - ii[i];
889       aj = aij->j + ii[i];
890       aa = aij->a + ii[i];
891 
892       for (j=0; j<n; j++) {
893         if (PetscAbsScalar(mask[*aj])) {
894           if (b) bb[*ridx] -= *aa*xx[*aj];
895           *aa = 0.0;
896         }
897         aa++;
898         aj++;
899       }
900       ridx++;
901     }
902   } else { /* do not use compressed row format */
903     m = l->B->rmap->n;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908       for (j=0; j<n; j++) {
909         if (PetscAbsScalar(mask[*aj])) {
910           if (b) bb[i] -= *aa*xx[*aj];
911           *aa = 0.0;
912         }
913         aa++;
914         aj++;
915       }
916     }
917   }
918   if (x) {
919     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
920     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
921   }
922   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
923   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
924   ierr = PetscFree(lrows);CHKERRQ(ierr);
925 
926   /* only change matrix nonzero state if pattern was allowed to be changed */
927   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
928     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
929     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
935 {
936   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
937   PetscErrorCode ierr;
938   PetscInt       nt;
939   VecScatter     Mvctx = a->Mvctx;
940 
941   PetscFunctionBegin;
942   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
943   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
944 
945   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
947   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
948   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
949   PetscFunctionReturn(0);
950 }
951 
952 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
953 {
954   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
955   PetscErrorCode ierr;
956 
957   PetscFunctionBegin;
958   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
963 {
964   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
965   PetscErrorCode ierr;
966   VecScatter     Mvctx = a->Mvctx;
967 
968   PetscFunctionBegin;
969   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
970   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
971   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
972   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
973   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
974   PetscFunctionReturn(0);
975 }
976 
977 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
978 {
979   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
980   PetscErrorCode ierr;
981   PetscBool      merged;
982 
983   PetscFunctionBegin;
984   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
985   /* do nondiagonal part */
986   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
987   if (!merged) {
988     /* send it on its way */
989     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
990     /* do local part */
991     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
992     /* receive remote parts: note this assumes the values are not actually */
993     /* added in yy until the next line, */
994     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
995   } else {
996     /* do local part */
997     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
998     /* send it on its way */
999     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1000     /* values actually were received in the Begin() but we need to call this nop */
1001     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1002   }
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1007 {
1008   MPI_Comm       comm;
1009   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1010   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1011   IS             Me,Notme;
1012   PetscErrorCode ierr;
1013   PetscInt       M,N,first,last,*notme,i;
1014   PetscMPIInt    size;
1015 
1016   PetscFunctionBegin;
1017   /* Easy test: symmetric diagonal block */
1018   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1019   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1020   if (!*f) PetscFunctionReturn(0);
1021   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1022   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1023   if (size == 1) PetscFunctionReturn(0);
1024 
1025   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1026   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1027   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1028   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1029   for (i=0; i<first; i++) notme[i] = i;
1030   for (i=last; i<M; i++) notme[i-last+first] = i;
1031   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1032   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1033   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1034   Aoff = Aoffs[0];
1035   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1036   Boff = Boffs[0];
1037   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1038   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1039   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1040   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1041   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1042   ierr = PetscFree(notme);CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1047 {
1048   PetscErrorCode ierr;
1049 
1050   PetscFunctionBegin;
1051   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1056 {
1057   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1058   PetscErrorCode ierr;
1059 
1060   PetscFunctionBegin;
1061   /* do nondiagonal part */
1062   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1063   /* send it on its way */
1064   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1065   /* do local part */
1066   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1067   /* receive remote parts */
1068   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 /*
1073   This only works correctly for square matrices where the subblock A->A is the
1074    diagonal block
1075 */
1076 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1077 {
1078   PetscErrorCode ierr;
1079   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1080 
1081   PetscFunctionBegin;
1082   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1083   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1084   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1089 {
1090   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1091   PetscErrorCode ierr;
1092 
1093   PetscFunctionBegin;
1094   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1095   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1100 {
1101   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105 #if defined(PETSC_USE_LOG)
1106   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1107 #endif
1108   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1109   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1110   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1111   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1112 #if defined(PETSC_USE_CTABLE)
1113   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1114 #else
1115   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1116 #endif
1117   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1118   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1119   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1120   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1121   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1122   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1123   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1124 
1125   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1134 #if defined(PETSC_HAVE_ELEMENTAL)
1135   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1136 #endif
1137 #if defined(PETSC_HAVE_HYPRE)
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1140 #endif
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1145 {
1146   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1147   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1148   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1149   PetscErrorCode ierr;
1150   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1151   int            fd;
1152   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1153   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1154   PetscScalar    *column_values;
1155   PetscInt       message_count,flowcontrolcount;
1156   FILE           *file;
1157 
1158   PetscFunctionBegin;
1159   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1160   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1161   nz   = A->nz + B->nz;
1162   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1163   if (!rank) {
1164     header[0] = MAT_FILE_CLASSID;
1165     header[1] = mat->rmap->N;
1166     header[2] = mat->cmap->N;
1167 
1168     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1169     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1170     /* get largest number of rows any processor has */
1171     rlen  = mat->rmap->n;
1172     range = mat->rmap->range;
1173     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1174   } else {
1175     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1176     rlen = mat->rmap->n;
1177   }
1178 
1179   /* load up the local row counts */
1180   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1181   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1182 
1183   /* store the row lengths to the file */
1184   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1185   if (!rank) {
1186     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1187     for (i=1; i<size; i++) {
1188       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1189       rlen = range[i+1] - range[i];
1190       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1191       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1192     }
1193     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1194   } else {
1195     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1196     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1197     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1198   }
1199   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1200 
1201   /* load up the local column indices */
1202   nzmax = nz; /* th processor needs space a largest processor needs */
1203   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1204   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1205   cnt   = 0;
1206   for (i=0; i<mat->rmap->n; i++) {
1207     for (j=B->i[i]; j<B->i[i+1]; j++) {
1208       if ((col = garray[B->j[j]]) > cstart) break;
1209       column_indices[cnt++] = col;
1210     }
1211     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1212     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1213   }
1214   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1215 
1216   /* store the column indices to the file */
1217   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1218   if (!rank) {
1219     MPI_Status status;
1220     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1221     for (i=1; i<size; i++) {
1222       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1223       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1224       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1225       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1227     }
1228     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1229   } else {
1230     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1231     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1233     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1234   }
1235   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1236 
1237   /* load up the local column values */
1238   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1239   cnt  = 0;
1240   for (i=0; i<mat->rmap->n; i++) {
1241     for (j=B->i[i]; j<B->i[i+1]; j++) {
1242       if (garray[B->j[j]] > cstart) break;
1243       column_values[cnt++] = B->a[j];
1244     }
1245     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1246     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1247   }
1248   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1249 
1250   /* store the column values to the file */
1251   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1252   if (!rank) {
1253     MPI_Status status;
1254     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1255     for (i=1; i<size; i++) {
1256       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1257       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1258       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1259       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1260       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1261     }
1262     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1263   } else {
1264     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1265     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1266     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1267     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1268   }
1269   ierr = PetscFree(column_values);CHKERRQ(ierr);
1270 
1271   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1272   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #include <petscdraw.h>
1277 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1278 {
1279   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1280   PetscErrorCode    ierr;
1281   PetscMPIInt       rank = aij->rank,size = aij->size;
1282   PetscBool         isdraw,iascii,isbinary;
1283   PetscViewer       sviewer;
1284   PetscViewerFormat format;
1285 
1286   PetscFunctionBegin;
1287   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1288   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1289   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1290   if (iascii) {
1291     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1292     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1293       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1294       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1295       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1296       for (i=0; i<(PetscInt)size; i++) {
1297         nmax = PetscMax(nmax,nz[i]);
1298         nmin = PetscMin(nmin,nz[i]);
1299         navg += nz[i];
1300       }
1301       ierr = PetscFree(nz);CHKERRQ(ierr);
1302       navg = navg/size;
1303       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1304       PetscFunctionReturn(0);
1305     }
1306     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1307     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1308       MatInfo   info;
1309       PetscBool inodes;
1310 
1311       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1312       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1313       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1314       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1315       if (!inodes) {
1316         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1317                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1318       } else {
1319         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1320                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1321       }
1322       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1323       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1324       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1325       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1326       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1327       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1328       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1329       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1330       PetscFunctionReturn(0);
1331     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1332       PetscInt inodecount,inodelimit,*inodes;
1333       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1334       if (inodes) {
1335         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1336       } else {
1337         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1338       }
1339       PetscFunctionReturn(0);
1340     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1341       PetscFunctionReturn(0);
1342     }
1343   } else if (isbinary) {
1344     if (size == 1) {
1345       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1346       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1347     } else {
1348       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1349     }
1350     PetscFunctionReturn(0);
1351   } else if (isdraw) {
1352     PetscDraw draw;
1353     PetscBool isnull;
1354     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1355     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1356     if (isnull) PetscFunctionReturn(0);
1357   }
1358 
1359   {
1360     /* assemble the entire matrix onto first processor. */
1361     Mat        A;
1362     Mat_SeqAIJ *Aloc;
1363     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1364     MatScalar  *a;
1365 
1366     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1367     if (!rank) {
1368       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1369     } else {
1370       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1371     }
1372     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1373     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1374     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1375     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1376     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1377 
1378     /* copy over the A part */
1379     Aloc = (Mat_SeqAIJ*)aij->A->data;
1380     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1381     row  = mat->rmap->rstart;
1382     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1383     for (i=0; i<m; i++) {
1384       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1385       row++;
1386       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1387     }
1388     aj = Aloc->j;
1389     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1390 
1391     /* copy over the B part */
1392     Aloc = (Mat_SeqAIJ*)aij->B->data;
1393     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1394     row  = mat->rmap->rstart;
1395     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1396     ct   = cols;
1397     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1398     for (i=0; i<m; i++) {
1399       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1400       row++;
1401       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1402     }
1403     ierr = PetscFree(ct);CHKERRQ(ierr);
1404     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1405     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1406     /*
1407        Everyone has to call to draw the matrix since the graphics waits are
1408        synchronized across all processors that share the PetscDraw object
1409     */
1410     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1411     if (!rank) {
1412       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1413       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1414     }
1415     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1416     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1417     ierr = MatDestroy(&A);CHKERRQ(ierr);
1418   }
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1423 {
1424   PetscErrorCode ierr;
1425   PetscBool      iascii,isdraw,issocket,isbinary;
1426 
1427   PetscFunctionBegin;
1428   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1429   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1430   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1431   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1432   if (iascii || isdraw || isbinary || issocket) {
1433     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1434   }
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1439 {
1440   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1441   PetscErrorCode ierr;
1442   Vec            bb1 = 0;
1443   PetscBool      hasop;
1444 
1445   PetscFunctionBegin;
1446   if (flag == SOR_APPLY_UPPER) {
1447     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1448     PetscFunctionReturn(0);
1449   }
1450 
1451   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1452     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1453   }
1454 
1455   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1456     if (flag & SOR_ZERO_INITIAL_GUESS) {
1457       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1458       its--;
1459     }
1460 
1461     while (its--) {
1462       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1463       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1464 
1465       /* update rhs: bb1 = bb - B*x */
1466       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1467       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1468 
1469       /* local sweep */
1470       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1471     }
1472   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1473     if (flag & SOR_ZERO_INITIAL_GUESS) {
1474       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1475       its--;
1476     }
1477     while (its--) {
1478       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1479       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1480 
1481       /* update rhs: bb1 = bb - B*x */
1482       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1483       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1484 
1485       /* local sweep */
1486       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1487     }
1488   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1489     if (flag & SOR_ZERO_INITIAL_GUESS) {
1490       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1491       its--;
1492     }
1493     while (its--) {
1494       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1495       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1496 
1497       /* update rhs: bb1 = bb - B*x */
1498       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1499       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1500 
1501       /* local sweep */
1502       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1503     }
1504   } else if (flag & SOR_EISENSTAT) {
1505     Vec xx1;
1506 
1507     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1508     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1509 
1510     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1511     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1512     if (!mat->diag) {
1513       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1514       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1515     }
1516     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1517     if (hasop) {
1518       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1519     } else {
1520       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1521     }
1522     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1523 
1524     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1525 
1526     /* local sweep */
1527     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1528     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1529     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1530   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1531 
1532   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1533 
1534   matin->factorerrortype = mat->A->factorerrortype;
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1539 {
1540   Mat            aA,aB,Aperm;
1541   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1542   PetscScalar    *aa,*ba;
1543   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1544   PetscSF        rowsf,sf;
1545   IS             parcolp = NULL;
1546   PetscBool      done;
1547   PetscErrorCode ierr;
1548 
1549   PetscFunctionBegin;
1550   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1551   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1552   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1553   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1554 
1555   /* Invert row permutation to find out where my rows should go */
1556   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1557   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1558   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1559   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1560   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1561   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1562 
1563   /* Invert column permutation to find out where my columns should go */
1564   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1565   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1566   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1567   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1568   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1569   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1570   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1571 
1572   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1573   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1574   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1575 
1576   /* Find out where my gcols should go */
1577   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1578   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1579   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1580   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1581   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1582   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1583   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1584   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1585 
1586   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1587   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1588   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1589   for (i=0; i<m; i++) {
1590     PetscInt row = rdest[i],rowner;
1591     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1592     for (j=ai[i]; j<ai[i+1]; j++) {
1593       PetscInt cowner,col = cdest[aj[j]];
1594       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1595       if (rowner == cowner) dnnz[i]++;
1596       else onnz[i]++;
1597     }
1598     for (j=bi[i]; j<bi[i+1]; j++) {
1599       PetscInt cowner,col = gcdest[bj[j]];
1600       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1601       if (rowner == cowner) dnnz[i]++;
1602       else onnz[i]++;
1603     }
1604   }
1605   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1606   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1607   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1608   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1609   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1610 
1611   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1612   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1613   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1614   for (i=0; i<m; i++) {
1615     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1616     PetscInt j0,rowlen;
1617     rowlen = ai[i+1] - ai[i];
1618     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1619       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1620       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1621     }
1622     rowlen = bi[i+1] - bi[i];
1623     for (j0=j=0; j<rowlen; j0=j) {
1624       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1625       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1626     }
1627   }
1628   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1629   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1630   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1631   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1632   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1633   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1634   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1635   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1636   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1637   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1638   *B = Aperm;
1639   PetscFunctionReturn(0);
1640 }
1641 
1642 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1643 {
1644   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1645   PetscErrorCode ierr;
1646 
1647   PetscFunctionBegin;
1648   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1649   if (ghosts) *ghosts = aij->garray;
1650   PetscFunctionReturn(0);
1651 }
1652 
1653 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1654 {
1655   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1656   Mat            A    = mat->A,B = mat->B;
1657   PetscErrorCode ierr;
1658   PetscReal      isend[5],irecv[5];
1659 
1660   PetscFunctionBegin;
1661   info->block_size = 1.0;
1662   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1663 
1664   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1665   isend[3] = info->memory;  isend[4] = info->mallocs;
1666 
1667   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1668 
1669   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1670   isend[3] += info->memory;  isend[4] += info->mallocs;
1671   if (flag == MAT_LOCAL) {
1672     info->nz_used      = isend[0];
1673     info->nz_allocated = isend[1];
1674     info->nz_unneeded  = isend[2];
1675     info->memory       = isend[3];
1676     info->mallocs      = isend[4];
1677   } else if (flag == MAT_GLOBAL_MAX) {
1678     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1679 
1680     info->nz_used      = irecv[0];
1681     info->nz_allocated = irecv[1];
1682     info->nz_unneeded  = irecv[2];
1683     info->memory       = irecv[3];
1684     info->mallocs      = irecv[4];
1685   } else if (flag == MAT_GLOBAL_SUM) {
1686     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1687 
1688     info->nz_used      = irecv[0];
1689     info->nz_allocated = irecv[1];
1690     info->nz_unneeded  = irecv[2];
1691     info->memory       = irecv[3];
1692     info->mallocs      = irecv[4];
1693   }
1694   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1695   info->fill_ratio_needed = 0;
1696   info->factor_mallocs    = 0;
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1701 {
1702   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1703   PetscErrorCode ierr;
1704 
1705   PetscFunctionBegin;
1706   switch (op) {
1707   case MAT_NEW_NONZERO_LOCATIONS:
1708   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1709   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1710   case MAT_KEEP_NONZERO_PATTERN:
1711   case MAT_NEW_NONZERO_LOCATION_ERR:
1712   case MAT_USE_INODES:
1713   case MAT_IGNORE_ZERO_ENTRIES:
1714     MatCheckPreallocated(A,1);
1715     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1716     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1717     break;
1718   case MAT_ROW_ORIENTED:
1719     MatCheckPreallocated(A,1);
1720     a->roworiented = flg;
1721 
1722     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1723     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1724     break;
1725   case MAT_NEW_DIAGONALS:
1726     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1727     break;
1728   case MAT_IGNORE_OFF_PROC_ENTRIES:
1729     a->donotstash = flg;
1730     break;
1731   case MAT_SPD:
1732     A->spd_set = PETSC_TRUE;
1733     A->spd     = flg;
1734     if (flg) {
1735       A->symmetric                  = PETSC_TRUE;
1736       A->structurally_symmetric     = PETSC_TRUE;
1737       A->symmetric_set              = PETSC_TRUE;
1738       A->structurally_symmetric_set = PETSC_TRUE;
1739     }
1740     break;
1741   case MAT_SYMMETRIC:
1742     MatCheckPreallocated(A,1);
1743     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1744     break;
1745   case MAT_STRUCTURALLY_SYMMETRIC:
1746     MatCheckPreallocated(A,1);
1747     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1748     break;
1749   case MAT_HERMITIAN:
1750     MatCheckPreallocated(A,1);
1751     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1752     break;
1753   case MAT_SYMMETRY_ETERNAL:
1754     MatCheckPreallocated(A,1);
1755     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1756     break;
1757   case MAT_SUBMAT_SINGLEIS:
1758     A->submat_singleis = flg;
1759     break;
1760   case MAT_STRUCTURE_ONLY:
1761     /* The option is handled directly by MatSetOption() */
1762     break;
1763   default:
1764     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1765   }
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1770 {
1771   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1772   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1773   PetscErrorCode ierr;
1774   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1775   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1776   PetscInt       *cmap,*idx_p;
1777 
1778   PetscFunctionBegin;
1779   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1780   mat->getrowactive = PETSC_TRUE;
1781 
1782   if (!mat->rowvalues && (idx || v)) {
1783     /*
1784         allocate enough space to hold information from the longest row.
1785     */
1786     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1787     PetscInt   max = 1,tmp;
1788     for (i=0; i<matin->rmap->n; i++) {
1789       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1790       if (max < tmp) max = tmp;
1791     }
1792     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1793   }
1794 
1795   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1796   lrow = row - rstart;
1797 
1798   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1799   if (!v)   {pvA = 0; pvB = 0;}
1800   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1801   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1802   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1803   nztot = nzA + nzB;
1804 
1805   cmap = mat->garray;
1806   if (v  || idx) {
1807     if (nztot) {
1808       /* Sort by increasing column numbers, assuming A and B already sorted */
1809       PetscInt imark = -1;
1810       if (v) {
1811         *v = v_p = mat->rowvalues;
1812         for (i=0; i<nzB; i++) {
1813           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1814           else break;
1815         }
1816         imark = i;
1817         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1818         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1819       }
1820       if (idx) {
1821         *idx = idx_p = mat->rowindices;
1822         if (imark > -1) {
1823           for (i=0; i<imark; i++) {
1824             idx_p[i] = cmap[cworkB[i]];
1825           }
1826         } else {
1827           for (i=0; i<nzB; i++) {
1828             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1829             else break;
1830           }
1831           imark = i;
1832         }
1833         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1834         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1835       }
1836     } else {
1837       if (idx) *idx = 0;
1838       if (v)   *v   = 0;
1839     }
1840   }
1841   *nz  = nztot;
1842   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1843   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1848 {
1849   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1850 
1851   PetscFunctionBegin;
1852   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1853   aij->getrowactive = PETSC_FALSE;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1858 {
1859   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1860   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1861   PetscErrorCode ierr;
1862   PetscInt       i,j,cstart = mat->cmap->rstart;
1863   PetscReal      sum = 0.0;
1864   MatScalar      *v;
1865 
1866   PetscFunctionBegin;
1867   if (aij->size == 1) {
1868     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1869   } else {
1870     if (type == NORM_FROBENIUS) {
1871       v = amat->a;
1872       for (i=0; i<amat->nz; i++) {
1873         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1874       }
1875       v = bmat->a;
1876       for (i=0; i<bmat->nz; i++) {
1877         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1878       }
1879       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1880       *norm = PetscSqrtReal(*norm);
1881       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1882     } else if (type == NORM_1) { /* max column norm */
1883       PetscReal *tmp,*tmp2;
1884       PetscInt  *jj,*garray = aij->garray;
1885       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1886       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1887       *norm = 0.0;
1888       v     = amat->a; jj = amat->j;
1889       for (j=0; j<amat->nz; j++) {
1890         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1891       }
1892       v = bmat->a; jj = bmat->j;
1893       for (j=0; j<bmat->nz; j++) {
1894         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1895       }
1896       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1897       for (j=0; j<mat->cmap->N; j++) {
1898         if (tmp2[j] > *norm) *norm = tmp2[j];
1899       }
1900       ierr = PetscFree(tmp);CHKERRQ(ierr);
1901       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1902       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1903     } else if (type == NORM_INFINITY) { /* max row norm */
1904       PetscReal ntemp = 0.0;
1905       for (j=0; j<aij->A->rmap->n; j++) {
1906         v   = amat->a + amat->i[j];
1907         sum = 0.0;
1908         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1909           sum += PetscAbsScalar(*v); v++;
1910         }
1911         v = bmat->a + bmat->i[j];
1912         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1913           sum += PetscAbsScalar(*v); v++;
1914         }
1915         if (sum > ntemp) ntemp = sum;
1916       }
1917       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1918       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1919     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1920   }
1921   PetscFunctionReturn(0);
1922 }
1923 
1924 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1925 {
1926   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1927   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1928   PetscErrorCode ierr;
1929   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1930   PetscInt       cstart = A->cmap->rstart,ncol;
1931   Mat            B;
1932   MatScalar      *array;
1933 
1934   PetscFunctionBegin;
1935   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1936 
1937   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1938   ai = Aloc->i; aj = Aloc->j;
1939   bi = Bloc->i; bj = Bloc->j;
1940   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1941     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1942     PetscSFNode          *oloc;
1943     PETSC_UNUSED PetscSF sf;
1944 
1945     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1946     /* compute d_nnz for preallocation */
1947     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1948     for (i=0; i<ai[ma]; i++) {
1949       d_nnz[aj[i]]++;
1950       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1951     }
1952     /* compute local off-diagonal contributions */
1953     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1954     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1955     /* map those to global */
1956     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1957     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1958     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1959     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1960     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1961     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1962     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1963 
1964     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1965     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1966     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1967     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1968     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1969     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1970   } else {
1971     B    = *matout;
1972     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1973     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1974   }
1975 
1976   /* copy over the A part */
1977   array = Aloc->a;
1978   row   = A->rmap->rstart;
1979   for (i=0; i<ma; i++) {
1980     ncol = ai[i+1]-ai[i];
1981     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1982     row++;
1983     array += ncol; aj += ncol;
1984   }
1985   aj = Aloc->j;
1986   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1987 
1988   /* copy over the B part */
1989   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1990   array = Bloc->a;
1991   row   = A->rmap->rstart;
1992   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1993   cols_tmp = cols;
1994   for (i=0; i<mb; i++) {
1995     ncol = bi[i+1]-bi[i];
1996     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1997     row++;
1998     array += ncol; cols_tmp += ncol;
1999   }
2000   ierr = PetscFree(cols);CHKERRQ(ierr);
2001 
2002   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2003   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2004   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2005     *matout = B;
2006   } else {
2007     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2008   }
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2013 {
2014   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2015   Mat            a    = aij->A,b = aij->B;
2016   PetscErrorCode ierr;
2017   PetscInt       s1,s2,s3;
2018 
2019   PetscFunctionBegin;
2020   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2021   if (rr) {
2022     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2023     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2024     /* Overlap communication with computation. */
2025     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2026   }
2027   if (ll) {
2028     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2029     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2030     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2031   }
2032   /* scale  the diagonal block */
2033   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2034 
2035   if (rr) {
2036     /* Do a scatter end and then right scale the off-diagonal block */
2037     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2038     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2039   }
2040   PetscFunctionReturn(0);
2041 }
2042 
2043 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2044 {
2045   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2046   PetscErrorCode ierr;
2047 
2048   PetscFunctionBegin;
2049   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2054 {
2055   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2056   Mat            a,b,c,d;
2057   PetscBool      flg;
2058   PetscErrorCode ierr;
2059 
2060   PetscFunctionBegin;
2061   a = matA->A; b = matA->B;
2062   c = matB->A; d = matB->B;
2063 
2064   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2065   if (flg) {
2066     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2067   }
2068   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2073 {
2074   PetscErrorCode ierr;
2075   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2076   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2077 
2078   PetscFunctionBegin;
2079   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2080   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2081     /* because of the column compression in the off-processor part of the matrix a->B,
2082        the number of columns in a->B and b->B may be different, hence we cannot call
2083        the MatCopy() directly on the two parts. If need be, we can provide a more
2084        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2085        then copying the submatrices */
2086     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2087   } else {
2088     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2089     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2090   }
2091   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2096 {
2097   PetscErrorCode ierr;
2098 
2099   PetscFunctionBegin;
2100   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 /*
2105    Computes the number of nonzeros per row needed for preallocation when X and Y
2106    have different nonzero structure.
2107 */
2108 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2109 {
2110   PetscInt       i,j,k,nzx,nzy;
2111 
2112   PetscFunctionBegin;
2113   /* Set the number of nonzeros in the new matrix */
2114   for (i=0; i<m; i++) {
2115     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2116     nzx = xi[i+1] - xi[i];
2117     nzy = yi[i+1] - yi[i];
2118     nnz[i] = 0;
2119     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2120       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2121       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2122       nnz[i]++;
2123     }
2124     for (; k<nzy; k++) nnz[i]++;
2125   }
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2130 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2131 {
2132   PetscErrorCode ierr;
2133   PetscInt       m = Y->rmap->N;
2134   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2135   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2136 
2137   PetscFunctionBegin;
2138   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2143 {
2144   PetscErrorCode ierr;
2145   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2146   PetscBLASInt   bnz,one=1;
2147   Mat_SeqAIJ     *x,*y;
2148 
2149   PetscFunctionBegin;
2150   if (str == SAME_NONZERO_PATTERN) {
2151     PetscScalar alpha = a;
2152     x    = (Mat_SeqAIJ*)xx->A->data;
2153     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2154     y    = (Mat_SeqAIJ*)yy->A->data;
2155     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2156     x    = (Mat_SeqAIJ*)xx->B->data;
2157     y    = (Mat_SeqAIJ*)yy->B->data;
2158     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2159     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2160     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2161   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2162     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2163   } else {
2164     Mat      B;
2165     PetscInt *nnz_d,*nnz_o;
2166     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2167     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2168     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2169     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2170     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2171     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2172     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2173     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2174     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2175     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2176     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2177     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2178     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2179     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2180   }
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2185 
2186 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2187 {
2188 #if defined(PETSC_USE_COMPLEX)
2189   PetscErrorCode ierr;
2190   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2191 
2192   PetscFunctionBegin;
2193   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2194   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2195 #else
2196   PetscFunctionBegin;
2197 #endif
2198   PetscFunctionReturn(0);
2199 }
2200 
2201 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2202 {
2203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2204   PetscErrorCode ierr;
2205 
2206   PetscFunctionBegin;
2207   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2208   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2213 {
2214   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2215   PetscErrorCode ierr;
2216 
2217   PetscFunctionBegin;
2218   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2219   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2224 {
2225   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2226   PetscErrorCode ierr;
2227   PetscInt       i,*idxb = 0;
2228   PetscScalar    *va,*vb;
2229   Vec            vtmp;
2230 
2231   PetscFunctionBegin;
2232   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2233   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2234   if (idx) {
2235     for (i=0; i<A->rmap->n; i++) {
2236       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2237     }
2238   }
2239 
2240   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2241   if (idx) {
2242     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2243   }
2244   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2245   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2246 
2247   for (i=0; i<A->rmap->n; i++) {
2248     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2249       va[i] = vb[i];
2250       if (idx) idx[i] = a->garray[idxb[i]];
2251     }
2252   }
2253 
2254   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2255   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2256   ierr = PetscFree(idxb);CHKERRQ(ierr);
2257   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2258   PetscFunctionReturn(0);
2259 }
2260 
2261 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2262 {
2263   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2264   PetscErrorCode ierr;
2265   PetscInt       i,*idxb = 0;
2266   PetscScalar    *va,*vb;
2267   Vec            vtmp;
2268 
2269   PetscFunctionBegin;
2270   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2271   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2272   if (idx) {
2273     for (i=0; i<A->cmap->n; i++) {
2274       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2275     }
2276   }
2277 
2278   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2279   if (idx) {
2280     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2281   }
2282   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2283   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2284 
2285   for (i=0; i<A->rmap->n; i++) {
2286     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2287       va[i] = vb[i];
2288       if (idx) idx[i] = a->garray[idxb[i]];
2289     }
2290   }
2291 
2292   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2293   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2294   ierr = PetscFree(idxb);CHKERRQ(ierr);
2295   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2296   PetscFunctionReturn(0);
2297 }
2298 
2299 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2300 {
2301   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2302   PetscInt       n      = A->rmap->n;
2303   PetscInt       cstart = A->cmap->rstart;
2304   PetscInt       *cmap  = mat->garray;
2305   PetscInt       *diagIdx, *offdiagIdx;
2306   Vec            diagV, offdiagV;
2307   PetscScalar    *a, *diagA, *offdiagA;
2308   PetscInt       r;
2309   PetscErrorCode ierr;
2310 
2311   PetscFunctionBegin;
2312   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2313   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2314   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2315   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2316   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2317   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2318   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2319   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2320   for (r = 0; r < n; ++r) {
2321     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2322       a[r]   = diagA[r];
2323       idx[r] = cstart + diagIdx[r];
2324     } else {
2325       a[r]   = offdiagA[r];
2326       idx[r] = cmap[offdiagIdx[r]];
2327     }
2328   }
2329   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2330   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2331   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2332   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2333   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2334   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2335   PetscFunctionReturn(0);
2336 }
2337 
2338 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2339 {
2340   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2341   PetscInt       n      = A->rmap->n;
2342   PetscInt       cstart = A->cmap->rstart;
2343   PetscInt       *cmap  = mat->garray;
2344   PetscInt       *diagIdx, *offdiagIdx;
2345   Vec            diagV, offdiagV;
2346   PetscScalar    *a, *diagA, *offdiagA;
2347   PetscInt       r;
2348   PetscErrorCode ierr;
2349 
2350   PetscFunctionBegin;
2351   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2352   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2353   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2354   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2355   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2356   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2357   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2358   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2359   for (r = 0; r < n; ++r) {
2360     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2361       a[r]   = diagA[r];
2362       idx[r] = cstart + diagIdx[r];
2363     } else {
2364       a[r]   = offdiagA[r];
2365       idx[r] = cmap[offdiagIdx[r]];
2366     }
2367   }
2368   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2369   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2370   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2371   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2372   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2373   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2378 {
2379   PetscErrorCode ierr;
2380   Mat            *dummy;
2381 
2382   PetscFunctionBegin;
2383   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2384   *newmat = *dummy;
2385   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2390 {
2391   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2392   PetscErrorCode ierr;
2393 
2394   PetscFunctionBegin;
2395   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2396   A->factorerrortype = a->A->factorerrortype;
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2401 {
2402   PetscErrorCode ierr;
2403   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2404 
2405   PetscFunctionBegin;
2406   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2407   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2408   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2409   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2410   PetscFunctionReturn(0);
2411 }
2412 
2413 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2414 {
2415   PetscFunctionBegin;
2416   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2417   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 /*@
2422    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2423 
2424    Collective on Mat
2425 
2426    Input Parameters:
2427 +    A - the matrix
2428 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2429 
2430  Level: advanced
2431 
2432 @*/
2433 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2434 {
2435   PetscErrorCode       ierr;
2436 
2437   PetscFunctionBegin;
2438   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2439   PetscFunctionReturn(0);
2440 }
2441 
2442 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2443 {
2444   PetscErrorCode       ierr;
2445   PetscBool            sc = PETSC_FALSE,flg;
2446 
2447   PetscFunctionBegin;
2448   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2449   ierr = PetscObjectOptionsBegin((PetscObject)A);
2450     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2451     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2452     if (flg) {
2453       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2454     }
2455   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2456   PetscFunctionReturn(0);
2457 }
2458 
2459 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2460 {
2461   PetscErrorCode ierr;
2462   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2463   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2464 
2465   PetscFunctionBegin;
2466   if (!Y->preallocated) {
2467     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2468   } else if (!aij->nz) {
2469     PetscInt nonew = aij->nonew;
2470     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2471     aij->nonew = nonew;
2472   }
2473   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2478 {
2479   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2480   PetscErrorCode ierr;
2481 
2482   PetscFunctionBegin;
2483   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2484   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2485   if (d) {
2486     PetscInt rstart;
2487     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2488     *d += rstart;
2489 
2490   }
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 
2495 /* -------------------------------------------------------------------*/
2496 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2497                                        MatGetRow_MPIAIJ,
2498                                        MatRestoreRow_MPIAIJ,
2499                                        MatMult_MPIAIJ,
2500                                 /* 4*/ MatMultAdd_MPIAIJ,
2501                                        MatMultTranspose_MPIAIJ,
2502                                        MatMultTransposeAdd_MPIAIJ,
2503                                        0,
2504                                        0,
2505                                        0,
2506                                 /*10*/ 0,
2507                                        0,
2508                                        0,
2509                                        MatSOR_MPIAIJ,
2510                                        MatTranspose_MPIAIJ,
2511                                 /*15*/ MatGetInfo_MPIAIJ,
2512                                        MatEqual_MPIAIJ,
2513                                        MatGetDiagonal_MPIAIJ,
2514                                        MatDiagonalScale_MPIAIJ,
2515                                        MatNorm_MPIAIJ,
2516                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2517                                        MatAssemblyEnd_MPIAIJ,
2518                                        MatSetOption_MPIAIJ,
2519                                        MatZeroEntries_MPIAIJ,
2520                                 /*24*/ MatZeroRows_MPIAIJ,
2521                                        0,
2522                                        0,
2523                                        0,
2524                                        0,
2525                                 /*29*/ MatSetUp_MPIAIJ,
2526                                        0,
2527                                        0,
2528                                        MatGetDiagonalBlock_MPIAIJ,
2529                                        0,
2530                                 /*34*/ MatDuplicate_MPIAIJ,
2531                                        0,
2532                                        0,
2533                                        0,
2534                                        0,
2535                                 /*39*/ MatAXPY_MPIAIJ,
2536                                        MatCreateSubMatrices_MPIAIJ,
2537                                        MatIncreaseOverlap_MPIAIJ,
2538                                        MatGetValues_MPIAIJ,
2539                                        MatCopy_MPIAIJ,
2540                                 /*44*/ MatGetRowMax_MPIAIJ,
2541                                        MatScale_MPIAIJ,
2542                                        MatShift_MPIAIJ,
2543                                        MatDiagonalSet_MPIAIJ,
2544                                        MatZeroRowsColumns_MPIAIJ,
2545                                 /*49*/ MatSetRandom_MPIAIJ,
2546                                        0,
2547                                        0,
2548                                        0,
2549                                        0,
2550                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2551                                        0,
2552                                        MatSetUnfactored_MPIAIJ,
2553                                        MatPermute_MPIAIJ,
2554                                        0,
2555                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2556                                        MatDestroy_MPIAIJ,
2557                                        MatView_MPIAIJ,
2558                                        0,
2559                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2560                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2561                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2562                                        0,
2563                                        0,
2564                                        0,
2565                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2566                                        MatGetRowMinAbs_MPIAIJ,
2567                                        0,
2568                                        0,
2569                                        0,
2570                                        0,
2571                                 /*75*/ MatFDColoringApply_AIJ,
2572                                        MatSetFromOptions_MPIAIJ,
2573                                        0,
2574                                        0,
2575                                        MatFindZeroDiagonals_MPIAIJ,
2576                                 /*80*/ 0,
2577                                        0,
2578                                        0,
2579                                 /*83*/ MatLoad_MPIAIJ,
2580                                        MatIsSymmetric_MPIAIJ,
2581                                        0,
2582                                        0,
2583                                        0,
2584                                        0,
2585                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2586                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2587                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2588                                        MatPtAP_MPIAIJ_MPIAIJ,
2589                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2590                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                 /*99*/ 0,
2596                                        0,
2597                                        0,
2598                                        MatConjugate_MPIAIJ,
2599                                        0,
2600                                 /*104*/MatSetValuesRow_MPIAIJ,
2601                                        MatRealPart_MPIAIJ,
2602                                        MatImaginaryPart_MPIAIJ,
2603                                        0,
2604                                        0,
2605                                 /*109*/0,
2606                                        0,
2607                                        MatGetRowMin_MPIAIJ,
2608                                        0,
2609                                        MatMissingDiagonal_MPIAIJ,
2610                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2611                                        0,
2612                                        MatGetGhosts_MPIAIJ,
2613                                        0,
2614                                        0,
2615                                 /*119*/0,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                        MatGetMultiProcBlock_MPIAIJ,
2620                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2621                                        MatGetColumnNorms_MPIAIJ,
2622                                        MatInvertBlockDiagonal_MPIAIJ,
2623                                        0,
2624                                        MatCreateSubMatricesMPI_MPIAIJ,
2625                                 /*129*/0,
2626                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2627                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2628                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2629                                        0,
2630                                 /*134*/0,
2631                                        0,
2632                                        MatRARt_MPIAIJ_MPIAIJ,
2633                                        0,
2634                                        0,
2635                                 /*139*/MatSetBlockSizes_MPIAIJ,
2636                                        0,
2637                                        0,
2638                                        MatFDColoringSetUp_MPIXAIJ,
2639                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2640                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2641 };
2642 
2643 /* ----------------------------------------------------------------------------------------*/
2644 
2645 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2646 {
2647   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2648   PetscErrorCode ierr;
2649 
2650   PetscFunctionBegin;
2651   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2652   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2657 {
2658   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2659   PetscErrorCode ierr;
2660 
2661   PetscFunctionBegin;
2662   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2663   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2664   PetscFunctionReturn(0);
2665 }
2666 
2667 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2668 {
2669   Mat_MPIAIJ     *b;
2670   PetscErrorCode ierr;
2671 
2672   PetscFunctionBegin;
2673   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2674   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2675   b = (Mat_MPIAIJ*)B->data;
2676 
2677 #if defined(PETSC_USE_CTABLE)
2678   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2679 #else
2680   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2681 #endif
2682   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2683   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2684   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2685 
2686   /* Because the B will have been resized we simply destroy it and create a new one each time */
2687   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2688   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2689   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2690   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2691   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2692   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2693 
2694   if (!B->preallocated) {
2695     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2696     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2697     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2698     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2699     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2700   }
2701 
2702   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2703   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2704   B->preallocated  = PETSC_TRUE;
2705   B->was_assembled = PETSC_FALSE;
2706   B->assembled     = PETSC_FALSE;;
2707   PetscFunctionReturn(0);
2708 }
2709 
2710 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2711 {
2712   Mat_MPIAIJ     *b;
2713   PetscErrorCode ierr;
2714 
2715   PetscFunctionBegin;
2716   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2717   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2718   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2719   b = (Mat_MPIAIJ*)B->data;
2720 
2721 #if defined(PETSC_USE_CTABLE)
2722   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2723 #else
2724   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2725 #endif
2726   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2727   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2728   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2729 
2730   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2731   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2732   B->preallocated  = PETSC_TRUE;
2733   B->was_assembled = PETSC_FALSE;
2734   B->assembled = PETSC_FALSE;
2735   PetscFunctionReturn(0);
2736 }
2737 
2738 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2739 {
2740   Mat            mat;
2741   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2742   PetscErrorCode ierr;
2743 
2744   PetscFunctionBegin;
2745   *newmat = 0;
2746   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2747   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2748   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2749   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2750   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2751   a       = (Mat_MPIAIJ*)mat->data;
2752 
2753   mat->factortype   = matin->factortype;
2754   mat->assembled    = PETSC_TRUE;
2755   mat->insertmode   = NOT_SET_VALUES;
2756   mat->preallocated = PETSC_TRUE;
2757 
2758   a->size         = oldmat->size;
2759   a->rank         = oldmat->rank;
2760   a->donotstash   = oldmat->donotstash;
2761   a->roworiented  = oldmat->roworiented;
2762   a->rowindices   = 0;
2763   a->rowvalues    = 0;
2764   a->getrowactive = PETSC_FALSE;
2765 
2766   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2767   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2768 
2769   if (oldmat->colmap) {
2770 #if defined(PETSC_USE_CTABLE)
2771     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2772 #else
2773     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2774     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2775     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2776 #endif
2777   } else a->colmap = 0;
2778   if (oldmat->garray) {
2779     PetscInt len;
2780     len  = oldmat->B->cmap->n;
2781     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2782     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2783     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2784   } else a->garray = 0;
2785 
2786   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2787   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2788   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2789   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2790 
2791   if (oldmat->Mvctx_mpi1) {
2792     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2793     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2794   }
2795 
2796   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2797   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2798   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2799   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2800   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2801   *newmat = mat;
2802   PetscFunctionReturn(0);
2803 }
2804 
2805 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2806 {
2807   PetscScalar    *vals,*svals;
2808   MPI_Comm       comm;
2809   PetscErrorCode ierr;
2810   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2811   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2812   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2813   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2814   PetscInt       cend,cstart,n,*rowners;
2815   int            fd;
2816   PetscInt       bs = newMat->rmap->bs;
2817 
2818   PetscFunctionBegin;
2819   /* force binary viewer to load .info file if it has not yet done so */
2820   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2821   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2822   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2823   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2824   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2825   if (!rank) {
2826     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2827     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2828     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2829   }
2830 
2831   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2832   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2833   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2834   if (bs < 0) bs = 1;
2835 
2836   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2837   M    = header[1]; N = header[2];
2838 
2839   /* If global sizes are set, check if they are consistent with that given in the file */
2840   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2841   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2842 
2843   /* determine ownership of all (block) rows */
2844   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2845   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2846   else m = newMat->rmap->n; /* Set by user */
2847 
2848   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2849   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2850 
2851   /* First process needs enough room for process with most rows */
2852   if (!rank) {
2853     mmax = rowners[1];
2854     for (i=2; i<=size; i++) {
2855       mmax = PetscMax(mmax, rowners[i]);
2856     }
2857   } else mmax = -1;             /* unused, but compilers complain */
2858 
2859   rowners[0] = 0;
2860   for (i=2; i<=size; i++) {
2861     rowners[i] += rowners[i-1];
2862   }
2863   rstart = rowners[rank];
2864   rend   = rowners[rank+1];
2865 
2866   /* distribute row lengths to all processors */
2867   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2868   if (!rank) {
2869     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2870     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2871     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2872     for (j=0; j<m; j++) {
2873       procsnz[0] += ourlens[j];
2874     }
2875     for (i=1; i<size; i++) {
2876       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2877       /* calculate the number of nonzeros on each processor */
2878       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2879         procsnz[i] += rowlengths[j];
2880       }
2881       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2882     }
2883     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2884   } else {
2885     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2886   }
2887 
2888   if (!rank) {
2889     /* determine max buffer needed and allocate it */
2890     maxnz = 0;
2891     for (i=0; i<size; i++) {
2892       maxnz = PetscMax(maxnz,procsnz[i]);
2893     }
2894     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2895 
2896     /* read in my part of the matrix column indices  */
2897     nz   = procsnz[0];
2898     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2899     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2900 
2901     /* read in every one elses and ship off */
2902     for (i=1; i<size; i++) {
2903       nz   = procsnz[i];
2904       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2905       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2906     }
2907     ierr = PetscFree(cols);CHKERRQ(ierr);
2908   } else {
2909     /* determine buffer space needed for message */
2910     nz = 0;
2911     for (i=0; i<m; i++) {
2912       nz += ourlens[i];
2913     }
2914     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2915 
2916     /* receive message of column indices*/
2917     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2918   }
2919 
2920   /* determine column ownership if matrix is not square */
2921   if (N != M) {
2922     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2923     else n = newMat->cmap->n;
2924     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2925     cstart = cend - n;
2926   } else {
2927     cstart = rstart;
2928     cend   = rend;
2929     n      = cend - cstart;
2930   }
2931 
2932   /* loop over local rows, determining number of off diagonal entries */
2933   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2934   jj   = 0;
2935   for (i=0; i<m; i++) {
2936     for (j=0; j<ourlens[i]; j++) {
2937       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2938       jj++;
2939     }
2940   }
2941 
2942   for (i=0; i<m; i++) {
2943     ourlens[i] -= offlens[i];
2944   }
2945   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2946 
2947   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2948 
2949   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2950 
2951   for (i=0; i<m; i++) {
2952     ourlens[i] += offlens[i];
2953   }
2954 
2955   if (!rank) {
2956     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2957 
2958     /* read in my part of the matrix numerical values  */
2959     nz   = procsnz[0];
2960     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2961 
2962     /* insert into matrix */
2963     jj      = rstart;
2964     smycols = mycols;
2965     svals   = vals;
2966     for (i=0; i<m; i++) {
2967       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2968       smycols += ourlens[i];
2969       svals   += ourlens[i];
2970       jj++;
2971     }
2972 
2973     /* read in other processors and ship out */
2974     for (i=1; i<size; i++) {
2975       nz   = procsnz[i];
2976       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2977       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2978     }
2979     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2980   } else {
2981     /* receive numeric values */
2982     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2983 
2984     /* receive message of values*/
2985     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2986 
2987     /* insert into matrix */
2988     jj      = rstart;
2989     smycols = mycols;
2990     svals   = vals;
2991     for (i=0; i<m; i++) {
2992       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2993       smycols += ourlens[i];
2994       svals   += ourlens[i];
2995       jj++;
2996     }
2997   }
2998   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2999   ierr = PetscFree(vals);CHKERRQ(ierr);
3000   ierr = PetscFree(mycols);CHKERRQ(ierr);
3001   ierr = PetscFree(rowners);CHKERRQ(ierr);
3002   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3003   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3004   PetscFunctionReturn(0);
3005 }
3006 
3007 /* Not scalable because of ISAllGather() unless getting all columns. */
3008 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3009 {
3010   PetscErrorCode ierr;
3011   IS             iscol_local;
3012   PetscBool      isstride;
3013   PetscMPIInt    lisstride=0,gisstride;
3014 
3015   PetscFunctionBegin;
3016   /* check if we are grabbing all columns*/
3017   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3018 
3019   if (isstride) {
3020     PetscInt  start,len,mstart,mlen;
3021     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3022     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3023     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3024     if (mstart == start && mlen-mstart == len) lisstride = 1;
3025   }
3026 
3027   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3028   if (gisstride) {
3029     PetscInt N;
3030     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3031     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3032     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3033     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3034   } else {
3035     PetscInt cbs;
3036     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3037     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3038     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3039   }
3040 
3041   *isseq = iscol_local;
3042   PetscFunctionReturn(0);
3043 }
3044 
3045 /*
3046  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3047  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3048 
3049  Input Parameters:
3050    mat - matrix
3051    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3052            i.e., mat->rstart <= isrow[i] < mat->rend
3053    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3054            i.e., mat->cstart <= iscol[i] < mat->cend
3055  Output Parameter:
3056    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3057    iscol_o - sequential column index set for retrieving mat->B
3058    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3059  */
3060 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3061 {
3062   PetscErrorCode ierr;
3063   Vec            x,cmap;
3064   const PetscInt *is_idx;
3065   PetscScalar    *xarray,*cmaparray;
3066   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3067   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3068   Mat            B=a->B;
3069   Vec            lvec=a->lvec,lcmap;
3070   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3071   MPI_Comm       comm;
3072   VecScatter     Mvctx=a->Mvctx;
3073 
3074   PetscFunctionBegin;
3075   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3076   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3077 
3078   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3079   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3080   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3081   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3082   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3083 
3084   /* Get start indices */
3085   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3086   isstart -= ncols;
3087   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3088 
3089   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3090   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3091   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3092   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3093   for (i=0; i<ncols; i++) {
3094     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3095     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3096     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3097   }
3098   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3099   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3100   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3101 
3102   /* Get iscol_d */
3103   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3104   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3105   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3106 
3107   /* Get isrow_d */
3108   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3109   rstart = mat->rmap->rstart;
3110   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3111   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3112   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3113   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3114 
3115   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3116   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3117   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3118 
3119   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3120   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3121   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3122 
3123   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3124 
3125   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3126   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3127 
3128   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3129   /* off-process column indices */
3130   count = 0;
3131   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3132   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3133 
3134   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3135   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3136   for (i=0; i<Bn; i++) {
3137     if (PetscRealPart(xarray[i]) > -1.0) {
3138       idx[count]     = i;                   /* local column index in off-diagonal part B */
3139       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3140       count++;
3141     }
3142   }
3143   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3144   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3145 
3146   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3147   /* cannot ensure iscol_o has same blocksize as iscol! */
3148 
3149   ierr = PetscFree(idx);CHKERRQ(ierr);
3150   *garray = cmap1;
3151 
3152   ierr = VecDestroy(&x);CHKERRQ(ierr);
3153   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3154   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3155   PetscFunctionReturn(0);
3156 }
3157 
3158 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3159 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3160 {
3161   PetscErrorCode ierr;
3162   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3163   Mat            M = NULL;
3164   MPI_Comm       comm;
3165   IS             iscol_d,isrow_d,iscol_o;
3166   Mat            Asub = NULL,Bsub = NULL;
3167   PetscInt       n;
3168 
3169   PetscFunctionBegin;
3170   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3171 
3172   if (call == MAT_REUSE_MATRIX) {
3173     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3174     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3175     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3176 
3177     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3178     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3179 
3180     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3181     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3182 
3183     /* Update diagonal and off-diagonal portions of submat */
3184     asub = (Mat_MPIAIJ*)(*submat)->data;
3185     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3186     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3187     if (n) {
3188       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3189     }
3190     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3191     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3192 
3193   } else { /* call == MAT_INITIAL_MATRIX) */
3194     const PetscInt *garray;
3195     PetscInt        BsubN;
3196 
3197     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3198     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3199 
3200     /* Create local submatrices Asub and Bsub */
3201     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3202     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3203 
3204     /* Create submatrix M */
3205     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3206 
3207     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3208     asub = (Mat_MPIAIJ*)M->data;
3209 
3210     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3211     n = asub->B->cmap->N;
3212     if (BsubN > n) {
3213       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3214       const PetscInt *idx;
3215       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3216       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3217 
3218       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3219       j = 0;
3220       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3221       for (i=0; i<n; i++) {
3222         if (j >= BsubN) break;
3223         while (subgarray[i] > garray[j]) j++;
3224 
3225         if (subgarray[i] == garray[j]) {
3226           idx_new[i] = idx[j++];
3227         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3228       }
3229       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3230 
3231       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3232       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3233 
3234     } else if (BsubN < n) {
3235       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3236     }
3237 
3238     ierr = PetscFree(garray);CHKERRQ(ierr);
3239     *submat = M;
3240 
3241     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3242     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3243     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3244 
3245     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3246     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3247 
3248     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3249     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3250   }
3251   PetscFunctionReturn(0);
3252 }
3253 
3254 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3255 {
3256   PetscErrorCode ierr;
3257   IS             iscol_local=NULL,isrow_d;
3258   PetscInt       csize;
3259   PetscInt       n,i,j,start,end;
3260   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3261   MPI_Comm       comm;
3262 
3263   PetscFunctionBegin;
3264   /* If isrow has same processor distribution as mat,
3265      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3266   if (call == MAT_REUSE_MATRIX) {
3267     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3268     if (isrow_d) {
3269       sameRowDist  = PETSC_TRUE;
3270       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3271     } else {
3272       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3273       if (iscol_local) {
3274         sameRowDist  = PETSC_TRUE;
3275         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3276       }
3277     }
3278   } else {
3279     /* Check if isrow has same processor distribution as mat */
3280     sameDist[0] = PETSC_FALSE;
3281     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3282     if (!n) {
3283       sameDist[0] = PETSC_TRUE;
3284     } else {
3285       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3286       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3287       if (i >= start && j < end) {
3288         sameDist[0] = PETSC_TRUE;
3289       }
3290     }
3291 
3292     /* Check if iscol has same processor distribution as mat */
3293     sameDist[1] = PETSC_FALSE;
3294     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3295     if (!n) {
3296       sameDist[1] = PETSC_TRUE;
3297     } else {
3298       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3299       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3300       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3301     }
3302 
3303     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3304     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3305     sameRowDist = tsameDist[0];
3306   }
3307 
3308   if (sameRowDist) {
3309     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3310       /* isrow and iscol have same processor distribution as mat */
3311       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3312       PetscFunctionReturn(0);
3313     } else { /* sameRowDist */
3314       /* isrow has same processor distribution as mat */
3315       if (call == MAT_INITIAL_MATRIX) {
3316         PetscBool sorted;
3317         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3318         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3319         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3320         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3321 
3322         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3323         if (sorted) {
3324           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3325           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3326           PetscFunctionReturn(0);
3327         }
3328       } else { /* call == MAT_REUSE_MATRIX */
3329         IS    iscol_sub;
3330         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3331         if (iscol_sub) {
3332           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3333           PetscFunctionReturn(0);
3334         }
3335       }
3336     }
3337   }
3338 
3339   /* General case: iscol -> iscol_local which has global size of iscol */
3340   if (call == MAT_REUSE_MATRIX) {
3341     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3342     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3343   } else {
3344     if (!iscol_local) {
3345       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3346     }
3347   }
3348 
3349   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3350   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3351 
3352   if (call == MAT_INITIAL_MATRIX) {
3353     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3354     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3355   }
3356   PetscFunctionReturn(0);
3357 }
3358 
3359 /*@C
3360      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3361          and "off-diagonal" part of the matrix in CSR format.
3362 
3363    Collective on MPI_Comm
3364 
3365    Input Parameters:
3366 +  comm - MPI communicator
3367 .  A - "diagonal" portion of matrix
3368 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3369 -  garray - global index of B columns
3370 
3371    Output Parameter:
3372 .   mat - the matrix, with input A as its local diagonal matrix
3373    Level: advanced
3374 
3375    Notes:
3376        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3377        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3378 
3379 .seealso: MatCreateMPIAIJWithSplitArrays()
3380 @*/
3381 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3382 {
3383   PetscErrorCode ierr;
3384   Mat_MPIAIJ     *maij;
3385   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3386   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3387   PetscScalar    *oa=b->a;
3388   Mat            Bnew;
3389   PetscInt       m,n,N;
3390 
3391   PetscFunctionBegin;
3392   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3393   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3394   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3395   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3396   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3397   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3398 
3399   /* Get global columns of mat */
3400   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3401 
3402   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3403   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3404   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3405   maij = (Mat_MPIAIJ*)(*mat)->data;
3406 
3407   (*mat)->preallocated = PETSC_TRUE;
3408 
3409   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3410   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3411 
3412   /* Set A as diagonal portion of *mat */
3413   maij->A = A;
3414 
3415   nz = oi[m];
3416   for (i=0; i<nz; i++) {
3417     col   = oj[i];
3418     oj[i] = garray[col];
3419   }
3420 
3421    /* Set Bnew as off-diagonal portion of *mat */
3422   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3423   bnew        = (Mat_SeqAIJ*)Bnew->data;
3424   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3425   maij->B     = Bnew;
3426 
3427   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3428 
3429   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3430   b->free_a       = PETSC_FALSE;
3431   b->free_ij      = PETSC_FALSE;
3432   ierr = MatDestroy(&B);CHKERRQ(ierr);
3433 
3434   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3435   bnew->free_a       = PETSC_TRUE;
3436   bnew->free_ij      = PETSC_TRUE;
3437 
3438   /* condense columns of maij->B */
3439   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3440   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3441   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3442   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3443   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3444   PetscFunctionReturn(0);
3445 }
3446 
3447 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3448 
3449 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3450 {
3451   PetscErrorCode ierr;
3452   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3453   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3454   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3455   Mat            M,Msub,B=a->B;
3456   MatScalar      *aa;
3457   Mat_SeqAIJ     *aij;
3458   PetscInt       *garray = a->garray,*colsub,Ncols;
3459   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3460   IS             iscol_sub,iscmap;
3461   const PetscInt *is_idx,*cmap;
3462   PetscBool      allcolumns=PETSC_FALSE;
3463   MPI_Comm       comm;
3464 
3465   PetscFunctionBegin;
3466   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3467 
3468   if (call == MAT_REUSE_MATRIX) {
3469     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3470     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3471     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3472 
3473     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3474     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3475 
3476     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3477     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3478 
3479     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3480 
3481   } else { /* call == MAT_INITIAL_MATRIX) */
3482     PetscBool flg;
3483 
3484     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3485     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3486 
3487     /* (1) iscol -> nonscalable iscol_local */
3488     /* Check for special case: each processor gets entire matrix columns */
3489     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3490     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3491     if (allcolumns) {
3492       iscol_sub = iscol_local;
3493       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3494       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3495 
3496     } else {
3497       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3498       PetscInt *idx,*cmap1,k;
3499       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3500       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3501       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3502       count = 0;
3503       k     = 0;
3504       for (i=0; i<Ncols; i++) {
3505         j = is_idx[i];
3506         if (j >= cstart && j < cend) {
3507           /* diagonal part of mat */
3508           idx[count]     = j;
3509           cmap1[count++] = i; /* column index in submat */
3510         } else if (Bn) {
3511           /* off-diagonal part of mat */
3512           if (j == garray[k]) {
3513             idx[count]     = j;
3514             cmap1[count++] = i;  /* column index in submat */
3515           } else if (j > garray[k]) {
3516             while (j > garray[k] && k < Bn-1) k++;
3517             if (j == garray[k]) {
3518               idx[count]     = j;
3519               cmap1[count++] = i; /* column index in submat */
3520             }
3521           }
3522         }
3523       }
3524       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3525 
3526       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3527       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3528       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3529 
3530       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3531     }
3532 
3533     /* (3) Create sequential Msub */
3534     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3535   }
3536 
3537   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3538   aij  = (Mat_SeqAIJ*)(Msub)->data;
3539   ii   = aij->i;
3540   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3541 
3542   /*
3543       m - number of local rows
3544       Ncols - number of columns (same on all processors)
3545       rstart - first row in new global matrix generated
3546   */
3547   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3548 
3549   if (call == MAT_INITIAL_MATRIX) {
3550     /* (4) Create parallel newmat */
3551     PetscMPIInt    rank,size;
3552     PetscInt       csize;
3553 
3554     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3555     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3556 
3557     /*
3558         Determine the number of non-zeros in the diagonal and off-diagonal
3559         portions of the matrix in order to do correct preallocation
3560     */
3561 
3562     /* first get start and end of "diagonal" columns */
3563     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3564     if (csize == PETSC_DECIDE) {
3565       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3566       if (mglobal == Ncols) { /* square matrix */
3567         nlocal = m;
3568       } else {
3569         nlocal = Ncols/size + ((Ncols % size) > rank);
3570       }
3571     } else {
3572       nlocal = csize;
3573     }
3574     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3575     rstart = rend - nlocal;
3576     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3577 
3578     /* next, compute all the lengths */
3579     jj    = aij->j;
3580     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3581     olens = dlens + m;
3582     for (i=0; i<m; i++) {
3583       jend = ii[i+1] - ii[i];
3584       olen = 0;
3585       dlen = 0;
3586       for (j=0; j<jend; j++) {
3587         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3588         else dlen++;
3589         jj++;
3590       }
3591       olens[i] = olen;
3592       dlens[i] = dlen;
3593     }
3594 
3595     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3596     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3597 
3598     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3599     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3600     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3601     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3602     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3603     ierr = PetscFree(dlens);CHKERRQ(ierr);
3604 
3605   } else { /* call == MAT_REUSE_MATRIX */
3606     M    = *newmat;
3607     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3608     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3609     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3610     /*
3611          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3612        rather than the slower MatSetValues().
3613     */
3614     M->was_assembled = PETSC_TRUE;
3615     M->assembled     = PETSC_FALSE;
3616   }
3617 
3618   /* (5) Set values of Msub to *newmat */
3619   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3620   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3621 
3622   jj   = aij->j;
3623   aa   = aij->a;
3624   for (i=0; i<m; i++) {
3625     row = rstart + i;
3626     nz  = ii[i+1] - ii[i];
3627     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3628     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3629     jj += nz; aa += nz;
3630   }
3631   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3632 
3633   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3634   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3635 
3636   ierr = PetscFree(colsub);CHKERRQ(ierr);
3637 
3638   /* save Msub, iscol_sub and iscmap used in processor for next request */
3639   if (call ==  MAT_INITIAL_MATRIX) {
3640     *newmat = M;
3641     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3642     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3643 
3644     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3645     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3646 
3647     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3648     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3649 
3650     if (iscol_local) {
3651       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3652       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3653     }
3654   }
3655   PetscFunctionReturn(0);
3656 }
3657 
3658 /*
3659     Not great since it makes two copies of the submatrix, first an SeqAIJ
3660   in local and then by concatenating the local matrices the end result.
3661   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3662 
3663   Note: This requires a sequential iscol with all indices.
3664 */
3665 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3666 {
3667   PetscErrorCode ierr;
3668   PetscMPIInt    rank,size;
3669   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3670   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3671   Mat            M,Mreuse;
3672   MatScalar      *aa,*vwork;
3673   MPI_Comm       comm;
3674   Mat_SeqAIJ     *aij;
3675   PetscBool      colflag,allcolumns=PETSC_FALSE;
3676 
3677   PetscFunctionBegin;
3678   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3679   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3680   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3681 
3682   /* Check for special case: each processor gets entire matrix columns */
3683   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3684   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3685   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3686 
3687   if (call ==  MAT_REUSE_MATRIX) {
3688     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3689     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3690     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3691   } else {
3692     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3693   }
3694 
3695   /*
3696       m - number of local rows
3697       n - number of columns (same on all processors)
3698       rstart - first row in new global matrix generated
3699   */
3700   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3701   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3702   if (call == MAT_INITIAL_MATRIX) {
3703     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3704     ii  = aij->i;
3705     jj  = aij->j;
3706 
3707     /*
3708         Determine the number of non-zeros in the diagonal and off-diagonal
3709         portions of the matrix in order to do correct preallocation
3710     */
3711 
3712     /* first get start and end of "diagonal" columns */
3713     if (csize == PETSC_DECIDE) {
3714       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3715       if (mglobal == n) { /* square matrix */
3716         nlocal = m;
3717       } else {
3718         nlocal = n/size + ((n % size) > rank);
3719       }
3720     } else {
3721       nlocal = csize;
3722     }
3723     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3724     rstart = rend - nlocal;
3725     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3726 
3727     /* next, compute all the lengths */
3728     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3729     olens = dlens + m;
3730     for (i=0; i<m; i++) {
3731       jend = ii[i+1] - ii[i];
3732       olen = 0;
3733       dlen = 0;
3734       for (j=0; j<jend; j++) {
3735         if (*jj < rstart || *jj >= rend) olen++;
3736         else dlen++;
3737         jj++;
3738       }
3739       olens[i] = olen;
3740       dlens[i] = dlen;
3741     }
3742     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3743     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3744     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3745     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3746     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3747     ierr = PetscFree(dlens);CHKERRQ(ierr);
3748   } else {
3749     PetscInt ml,nl;
3750 
3751     M    = *newmat;
3752     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3753     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3754     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3755     /*
3756          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3757        rather than the slower MatSetValues().
3758     */
3759     M->was_assembled = PETSC_TRUE;
3760     M->assembled     = PETSC_FALSE;
3761   }
3762   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3763   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3764   ii   = aij->i;
3765   jj   = aij->j;
3766   aa   = aij->a;
3767   for (i=0; i<m; i++) {
3768     row   = rstart + i;
3769     nz    = ii[i+1] - ii[i];
3770     cwork = jj;     jj += nz;
3771     vwork = aa;     aa += nz;
3772     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3773   }
3774 
3775   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3776   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3777   *newmat = M;
3778 
3779   /* save submatrix used in processor for next request */
3780   if (call ==  MAT_INITIAL_MATRIX) {
3781     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3782     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3783   }
3784   PetscFunctionReturn(0);
3785 }
3786 
3787 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3788 {
3789   PetscInt       m,cstart, cend,j,nnz,i,d;
3790   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3791   const PetscInt *JJ;
3792   PetscScalar    *values;
3793   PetscErrorCode ierr;
3794   PetscBool      nooffprocentries;
3795 
3796   PetscFunctionBegin;
3797   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3798 
3799   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3800   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3801   m      = B->rmap->n;
3802   cstart = B->cmap->rstart;
3803   cend   = B->cmap->rend;
3804   rstart = B->rmap->rstart;
3805 
3806   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3807 
3808 #if defined(PETSC_USE_DEBUG)
3809   for (i=0; i<m; i++) {
3810     nnz = Ii[i+1]- Ii[i];
3811     JJ  = J + Ii[i];
3812     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3813     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3814     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3815   }
3816 #endif
3817 
3818   for (i=0; i<m; i++) {
3819     nnz     = Ii[i+1]- Ii[i];
3820     JJ      = J + Ii[i];
3821     nnz_max = PetscMax(nnz_max,nnz);
3822     d       = 0;
3823     for (j=0; j<nnz; j++) {
3824       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3825     }
3826     d_nnz[i] = d;
3827     o_nnz[i] = nnz - d;
3828   }
3829   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3830   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3831 
3832   if (v) values = (PetscScalar*)v;
3833   else {
3834     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3835   }
3836 
3837   for (i=0; i<m; i++) {
3838     ii   = i + rstart;
3839     nnz  = Ii[i+1]- Ii[i];
3840     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3841   }
3842   nooffprocentries    = B->nooffprocentries;
3843   B->nooffprocentries = PETSC_TRUE;
3844   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3845   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3846   B->nooffprocentries = nooffprocentries;
3847 
3848   if (!v) {
3849     ierr = PetscFree(values);CHKERRQ(ierr);
3850   }
3851   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3852   PetscFunctionReturn(0);
3853 }
3854 
3855 /*@
3856    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3857    (the default parallel PETSc format).
3858 
3859    Collective on MPI_Comm
3860 
3861    Input Parameters:
3862 +  B - the matrix
3863 .  i - the indices into j for the start of each local row (starts with zero)
3864 .  j - the column indices for each local row (starts with zero)
3865 -  v - optional values in the matrix
3866 
3867    Level: developer
3868 
3869    Notes:
3870        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3871      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3872      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3873 
3874        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3875 
3876        The format which is used for the sparse matrix input, is equivalent to a
3877     row-major ordering.. i.e for the following matrix, the input data expected is
3878     as shown
3879 
3880 $        1 0 0
3881 $        2 0 3     P0
3882 $       -------
3883 $        4 5 6     P1
3884 $
3885 $     Process0 [P0]: rows_owned=[0,1]
3886 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3887 $        j =  {0,0,2}  [size = 3]
3888 $        v =  {1,2,3}  [size = 3]
3889 $
3890 $     Process1 [P1]: rows_owned=[2]
3891 $        i =  {0,3}    [size = nrow+1  = 1+1]
3892 $        j =  {0,1,2}  [size = 3]
3893 $        v =  {4,5,6}  [size = 3]
3894 
3895 .keywords: matrix, aij, compressed row, sparse, parallel
3896 
3897 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3898           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3899 @*/
3900 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3901 {
3902   PetscErrorCode ierr;
3903 
3904   PetscFunctionBegin;
3905   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3906   PetscFunctionReturn(0);
3907 }
3908 
3909 /*@C
3910    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3911    (the default parallel PETSc format).  For good matrix assembly performance
3912    the user should preallocate the matrix storage by setting the parameters
3913    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3914    performance can be increased by more than a factor of 50.
3915 
3916    Collective on MPI_Comm
3917 
3918    Input Parameters:
3919 +  B - the matrix
3920 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3921            (same value is used for all local rows)
3922 .  d_nnz - array containing the number of nonzeros in the various rows of the
3923            DIAGONAL portion of the local submatrix (possibly different for each row)
3924            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3925            The size of this array is equal to the number of local rows, i.e 'm'.
3926            For matrices that will be factored, you must leave room for (and set)
3927            the diagonal entry even if it is zero.
3928 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3929            submatrix (same value is used for all local rows).
3930 -  o_nnz - array containing the number of nonzeros in the various rows of the
3931            OFF-DIAGONAL portion of the local submatrix (possibly different for
3932            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3933            structure. The size of this array is equal to the number
3934            of local rows, i.e 'm'.
3935 
3936    If the *_nnz parameter is given then the *_nz parameter is ignored
3937 
3938    The AIJ format (also called the Yale sparse matrix format or
3939    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3940    storage.  The stored row and column indices begin with zero.
3941    See Users-Manual: ch_mat for details.
3942 
3943    The parallel matrix is partitioned such that the first m0 rows belong to
3944    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3945    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3946 
3947    The DIAGONAL portion of the local submatrix of a processor can be defined
3948    as the submatrix which is obtained by extraction the part corresponding to
3949    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3950    first row that belongs to the processor, r2 is the last row belonging to
3951    the this processor, and c1-c2 is range of indices of the local part of a
3952    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3953    common case of a square matrix, the row and column ranges are the same and
3954    the DIAGONAL part is also square. The remaining portion of the local
3955    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3956 
3957    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3958 
3959    You can call MatGetInfo() to get information on how effective the preallocation was;
3960    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3961    You can also run with the option -info and look for messages with the string
3962    malloc in them to see if additional memory allocation was needed.
3963 
3964    Example usage:
3965 
3966    Consider the following 8x8 matrix with 34 non-zero values, that is
3967    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3968    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3969    as follows:
3970 
3971 .vb
3972             1  2  0  |  0  3  0  |  0  4
3973     Proc0   0  5  6  |  7  0  0  |  8  0
3974             9  0 10  | 11  0  0  | 12  0
3975     -------------------------------------
3976            13  0 14  | 15 16 17  |  0  0
3977     Proc1   0 18  0  | 19 20 21  |  0  0
3978             0  0  0  | 22 23  0  | 24  0
3979     -------------------------------------
3980     Proc2  25 26 27  |  0  0 28  | 29  0
3981            30  0  0  | 31 32 33  |  0 34
3982 .ve
3983 
3984    This can be represented as a collection of submatrices as:
3985 
3986 .vb
3987       A B C
3988       D E F
3989       G H I
3990 .ve
3991 
3992    Where the submatrices A,B,C are owned by proc0, D,E,F are
3993    owned by proc1, G,H,I are owned by proc2.
3994 
3995    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3996    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3997    The 'M','N' parameters are 8,8, and have the same values on all procs.
3998 
3999    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4000    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4001    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4002    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4003    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4004    matrix, ans [DF] as another SeqAIJ matrix.
4005 
4006    When d_nz, o_nz parameters are specified, d_nz storage elements are
4007    allocated for every row of the local diagonal submatrix, and o_nz
4008    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4009    One way to choose d_nz and o_nz is to use the max nonzerors per local
4010    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4011    In this case, the values of d_nz,o_nz are:
4012 .vb
4013      proc0 : dnz = 2, o_nz = 2
4014      proc1 : dnz = 3, o_nz = 2
4015      proc2 : dnz = 1, o_nz = 4
4016 .ve
4017    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4018    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4019    for proc3. i.e we are using 12+15+10=37 storage locations to store
4020    34 values.
4021 
4022    When d_nnz, o_nnz parameters are specified, the storage is specified
4023    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4024    In the above case the values for d_nnz,o_nnz are:
4025 .vb
4026      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4027      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4028      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4029 .ve
4030    Here the space allocated is sum of all the above values i.e 34, and
4031    hence pre-allocation is perfect.
4032 
4033    Level: intermediate
4034 
4035 .keywords: matrix, aij, compressed row, sparse, parallel
4036 
4037 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4038           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4039 @*/
4040 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4041 {
4042   PetscErrorCode ierr;
4043 
4044   PetscFunctionBegin;
4045   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4046   PetscValidType(B,1);
4047   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4048   PetscFunctionReturn(0);
4049 }
4050 
4051 /*@
4052      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4053          CSR format the local rows.
4054 
4055    Collective on MPI_Comm
4056 
4057    Input Parameters:
4058 +  comm - MPI communicator
4059 .  m - number of local rows (Cannot be PETSC_DECIDE)
4060 .  n - This value should be the same as the local size used in creating the
4061        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4062        calculated if N is given) For square matrices n is almost always m.
4063 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4064 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4065 .   i - row indices
4066 .   j - column indices
4067 -   a - matrix values
4068 
4069    Output Parameter:
4070 .   mat - the matrix
4071 
4072    Level: intermediate
4073 
4074    Notes:
4075        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4076      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4077      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4078 
4079        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4080 
4081        The format which is used for the sparse matrix input, is equivalent to a
4082     row-major ordering.. i.e for the following matrix, the input data expected is
4083     as shown
4084 
4085 $        1 0 0
4086 $        2 0 3     P0
4087 $       -------
4088 $        4 5 6     P1
4089 $
4090 $     Process0 [P0]: rows_owned=[0,1]
4091 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4092 $        j =  {0,0,2}  [size = 3]
4093 $        v =  {1,2,3}  [size = 3]
4094 $
4095 $     Process1 [P1]: rows_owned=[2]
4096 $        i =  {0,3}    [size = nrow+1  = 1+1]
4097 $        j =  {0,1,2}  [size = 3]
4098 $        v =  {4,5,6}  [size = 3]
4099 
4100 .keywords: matrix, aij, compressed row, sparse, parallel
4101 
4102 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4103           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4104 @*/
4105 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4106 {
4107   PetscErrorCode ierr;
4108 
4109   PetscFunctionBegin;
4110   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4111   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4112   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4113   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4114   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4115   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4116   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4117   PetscFunctionReturn(0);
4118 }
4119 
4120 /*@C
4121    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4122    (the default parallel PETSc format).  For good matrix assembly performance
4123    the user should preallocate the matrix storage by setting the parameters
4124    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4125    performance can be increased by more than a factor of 50.
4126 
4127    Collective on MPI_Comm
4128 
4129    Input Parameters:
4130 +  comm - MPI communicator
4131 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4132            This value should be the same as the local size used in creating the
4133            y vector for the matrix-vector product y = Ax.
4134 .  n - This value should be the same as the local size used in creating the
4135        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4136        calculated if N is given) For square matrices n is almost always m.
4137 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4138 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4139 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4140            (same value is used for all local rows)
4141 .  d_nnz - array containing the number of nonzeros in the various rows of the
4142            DIAGONAL portion of the local submatrix (possibly different for each row)
4143            or NULL, if d_nz is used to specify the nonzero structure.
4144            The size of this array is equal to the number of local rows, i.e 'm'.
4145 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4146            submatrix (same value is used for all local rows).
4147 -  o_nnz - array containing the number of nonzeros in the various rows of the
4148            OFF-DIAGONAL portion of the local submatrix (possibly different for
4149            each row) or NULL, if o_nz is used to specify the nonzero
4150            structure. The size of this array is equal to the number
4151            of local rows, i.e 'm'.
4152 
4153    Output Parameter:
4154 .  A - the matrix
4155 
4156    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4157    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4158    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4159 
4160    Notes:
4161    If the *_nnz parameter is given then the *_nz parameter is ignored
4162 
4163    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4164    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4165    storage requirements for this matrix.
4166 
4167    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4168    processor than it must be used on all processors that share the object for
4169    that argument.
4170 
4171    The user MUST specify either the local or global matrix dimensions
4172    (possibly both).
4173 
4174    The parallel matrix is partitioned across processors such that the
4175    first m0 rows belong to process 0, the next m1 rows belong to
4176    process 1, the next m2 rows belong to process 2 etc.. where
4177    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4178    values corresponding to [m x N] submatrix.
4179 
4180    The columns are logically partitioned with the n0 columns belonging
4181    to 0th partition, the next n1 columns belonging to the next
4182    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4183 
4184    The DIAGONAL portion of the local submatrix on any given processor
4185    is the submatrix corresponding to the rows and columns m,n
4186    corresponding to the given processor. i.e diagonal matrix on
4187    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4188    etc. The remaining portion of the local submatrix [m x (N-n)]
4189    constitute the OFF-DIAGONAL portion. The example below better
4190    illustrates this concept.
4191 
4192    For a square global matrix we define each processor's diagonal portion
4193    to be its local rows and the corresponding columns (a square submatrix);
4194    each processor's off-diagonal portion encompasses the remainder of the
4195    local matrix (a rectangular submatrix).
4196 
4197    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4198 
4199    When calling this routine with a single process communicator, a matrix of
4200    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4201    type of communicator, use the construction mechanism
4202 .vb
4203      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4204 .ve
4205 
4206 $     MatCreate(...,&A);
4207 $     MatSetType(A,MATMPIAIJ);
4208 $     MatSetSizes(A, m,n,M,N);
4209 $     MatMPIAIJSetPreallocation(A,...);
4210 
4211    By default, this format uses inodes (identical nodes) when possible.
4212    We search for consecutive rows with the same nonzero structure, thereby
4213    reusing matrix information to achieve increased efficiency.
4214 
4215    Options Database Keys:
4216 +  -mat_no_inode  - Do not use inodes
4217 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4218 
4219 
4220 
4221    Example usage:
4222 
4223    Consider the following 8x8 matrix with 34 non-zero values, that is
4224    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4225    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4226    as follows
4227 
4228 .vb
4229             1  2  0  |  0  3  0  |  0  4
4230     Proc0   0  5  6  |  7  0  0  |  8  0
4231             9  0 10  | 11  0  0  | 12  0
4232     -------------------------------------
4233            13  0 14  | 15 16 17  |  0  0
4234     Proc1   0 18  0  | 19 20 21  |  0  0
4235             0  0  0  | 22 23  0  | 24  0
4236     -------------------------------------
4237     Proc2  25 26 27  |  0  0 28  | 29  0
4238            30  0  0  | 31 32 33  |  0 34
4239 .ve
4240 
4241    This can be represented as a collection of submatrices as
4242 
4243 .vb
4244       A B C
4245       D E F
4246       G H I
4247 .ve
4248 
4249    Where the submatrices A,B,C are owned by proc0, D,E,F are
4250    owned by proc1, G,H,I are owned by proc2.
4251 
4252    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4253    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4254    The 'M','N' parameters are 8,8, and have the same values on all procs.
4255 
4256    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4257    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4258    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4259    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4260    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4261    matrix, ans [DF] as another SeqAIJ matrix.
4262 
4263    When d_nz, o_nz parameters are specified, d_nz storage elements are
4264    allocated for every row of the local diagonal submatrix, and o_nz
4265    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4266    One way to choose d_nz and o_nz is to use the max nonzerors per local
4267    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4268    In this case, the values of d_nz,o_nz are
4269 .vb
4270      proc0 : dnz = 2, o_nz = 2
4271      proc1 : dnz = 3, o_nz = 2
4272      proc2 : dnz = 1, o_nz = 4
4273 .ve
4274    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4275    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4276    for proc3. i.e we are using 12+15+10=37 storage locations to store
4277    34 values.
4278 
4279    When d_nnz, o_nnz parameters are specified, the storage is specified
4280    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4281    In the above case the values for d_nnz,o_nnz are
4282 .vb
4283      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4284      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4285      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4286 .ve
4287    Here the space allocated is sum of all the above values i.e 34, and
4288    hence pre-allocation is perfect.
4289 
4290    Level: intermediate
4291 
4292 .keywords: matrix, aij, compressed row, sparse, parallel
4293 
4294 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4295           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4296 @*/
4297 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4298 {
4299   PetscErrorCode ierr;
4300   PetscMPIInt    size;
4301 
4302   PetscFunctionBegin;
4303   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4304   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4305   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4306   if (size > 1) {
4307     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4308     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4309   } else {
4310     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4311     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4312   }
4313   PetscFunctionReturn(0);
4314 }
4315 
4316 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4317 {
4318   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4319   PetscBool      flg;
4320   PetscErrorCode ierr;
4321 
4322   PetscFunctionBegin;
4323   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4324   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4325   if (Ad)     *Ad     = a->A;
4326   if (Ao)     *Ao     = a->B;
4327   if (colmap) *colmap = a->garray;
4328   PetscFunctionReturn(0);
4329 }
4330 
4331 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4332 {
4333   PetscErrorCode ierr;
4334   PetscInt       m,N,i,rstart,nnz,Ii;
4335   PetscInt       *indx;
4336   PetscScalar    *values;
4337 
4338   PetscFunctionBegin;
4339   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4340   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4341     PetscInt       *dnz,*onz,sum,bs,cbs;
4342 
4343     if (n == PETSC_DECIDE) {
4344       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4345     }
4346     /* Check sum(n) = N */
4347     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4348     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4349 
4350     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4351     rstart -= m;
4352 
4353     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4354     for (i=0; i<m; i++) {
4355       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4356       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4357       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4358     }
4359 
4360     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4361     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4362     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4363     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4364     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4365     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4366     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4367     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4368   }
4369 
4370   /* numeric phase */
4371   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4372   for (i=0; i<m; i++) {
4373     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4374     Ii   = i + rstart;
4375     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4376     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4377   }
4378   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4379   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4380   PetscFunctionReturn(0);
4381 }
4382 
4383 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4384 {
4385   PetscErrorCode    ierr;
4386   PetscMPIInt       rank;
4387   PetscInt          m,N,i,rstart,nnz;
4388   size_t            len;
4389   const PetscInt    *indx;
4390   PetscViewer       out;
4391   char              *name;
4392   Mat               B;
4393   const PetscScalar *values;
4394 
4395   PetscFunctionBegin;
4396   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4397   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4398   /* Should this be the type of the diagonal block of A? */
4399   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4400   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4401   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4402   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4403   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4404   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4405   for (i=0; i<m; i++) {
4406     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4407     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4408     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4409   }
4410   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4411   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4412 
4413   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4414   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4415   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4416   sprintf(name,"%s.%d",outfile,rank);
4417   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4418   ierr = PetscFree(name);CHKERRQ(ierr);
4419   ierr = MatView(B,out);CHKERRQ(ierr);
4420   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4421   ierr = MatDestroy(&B);CHKERRQ(ierr);
4422   PetscFunctionReturn(0);
4423 }
4424 
4425 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4426 {
4427   PetscErrorCode      ierr;
4428   Mat_Merge_SeqsToMPI *merge;
4429   PetscContainer      container;
4430 
4431   PetscFunctionBegin;
4432   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4433   if (container) {
4434     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4435     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4436     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4437     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4438     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4439     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4440     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4441     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4442     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4443     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4444     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4445     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4446     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4447     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4448     ierr = PetscFree(merge);CHKERRQ(ierr);
4449     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4450   }
4451   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4452   PetscFunctionReturn(0);
4453 }
4454 
4455 #include <../src/mat/utils/freespace.h>
4456 #include <petscbt.h>
4457 
4458 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4459 {
4460   PetscErrorCode      ierr;
4461   MPI_Comm            comm;
4462   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4463   PetscMPIInt         size,rank,taga,*len_s;
4464   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4465   PetscInt            proc,m;
4466   PetscInt            **buf_ri,**buf_rj;
4467   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4468   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4469   MPI_Request         *s_waits,*r_waits;
4470   MPI_Status          *status;
4471   MatScalar           *aa=a->a;
4472   MatScalar           **abuf_r,*ba_i;
4473   Mat_Merge_SeqsToMPI *merge;
4474   PetscContainer      container;
4475 
4476   PetscFunctionBegin;
4477   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4478   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4479 
4480   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4481   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4482 
4483   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4484   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4485 
4486   bi     = merge->bi;
4487   bj     = merge->bj;
4488   buf_ri = merge->buf_ri;
4489   buf_rj = merge->buf_rj;
4490 
4491   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4492   owners = merge->rowmap->range;
4493   len_s  = merge->len_s;
4494 
4495   /* send and recv matrix values */
4496   /*-----------------------------*/
4497   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4498   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4499 
4500   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4501   for (proc=0,k=0; proc<size; proc++) {
4502     if (!len_s[proc]) continue;
4503     i    = owners[proc];
4504     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4505     k++;
4506   }
4507 
4508   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4509   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4510   ierr = PetscFree(status);CHKERRQ(ierr);
4511 
4512   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4513   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4514 
4515   /* insert mat values of mpimat */
4516   /*----------------------------*/
4517   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4518   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4519 
4520   for (k=0; k<merge->nrecv; k++) {
4521     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4522     nrows       = *(buf_ri_k[k]);
4523     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4524     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4525   }
4526 
4527   /* set values of ba */
4528   m = merge->rowmap->n;
4529   for (i=0; i<m; i++) {
4530     arow = owners[rank] + i;
4531     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4532     bnzi = bi[i+1] - bi[i];
4533     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4534 
4535     /* add local non-zero vals of this proc's seqmat into ba */
4536     anzi   = ai[arow+1] - ai[arow];
4537     aj     = a->j + ai[arow];
4538     aa     = a->a + ai[arow];
4539     nextaj = 0;
4540     for (j=0; nextaj<anzi; j++) {
4541       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4542         ba_i[j] += aa[nextaj++];
4543       }
4544     }
4545 
4546     /* add received vals into ba */
4547     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4548       /* i-th row */
4549       if (i == *nextrow[k]) {
4550         anzi   = *(nextai[k]+1) - *nextai[k];
4551         aj     = buf_rj[k] + *(nextai[k]);
4552         aa     = abuf_r[k] + *(nextai[k]);
4553         nextaj = 0;
4554         for (j=0; nextaj<anzi; j++) {
4555           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4556             ba_i[j] += aa[nextaj++];
4557           }
4558         }
4559         nextrow[k]++; nextai[k]++;
4560       }
4561     }
4562     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4563   }
4564   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4565   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4566 
4567   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4568   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4569   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4570   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4571   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4572   PetscFunctionReturn(0);
4573 }
4574 
4575 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4576 {
4577   PetscErrorCode      ierr;
4578   Mat                 B_mpi;
4579   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4580   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4581   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4582   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4583   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4584   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4585   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4586   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4587   MPI_Status          *status;
4588   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4589   PetscBT             lnkbt;
4590   Mat_Merge_SeqsToMPI *merge;
4591   PetscContainer      container;
4592 
4593   PetscFunctionBegin;
4594   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4595 
4596   /* make sure it is a PETSc comm */
4597   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4598   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4599   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4600 
4601   ierr = PetscNew(&merge);CHKERRQ(ierr);
4602   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4603 
4604   /* determine row ownership */
4605   /*---------------------------------------------------------*/
4606   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4607   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4608   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4609   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4610   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4611   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4612   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4613 
4614   m      = merge->rowmap->n;
4615   owners = merge->rowmap->range;
4616 
4617   /* determine the number of messages to send, their lengths */
4618   /*---------------------------------------------------------*/
4619   len_s = merge->len_s;
4620 
4621   len          = 0; /* length of buf_si[] */
4622   merge->nsend = 0;
4623   for (proc=0; proc<size; proc++) {
4624     len_si[proc] = 0;
4625     if (proc == rank) {
4626       len_s[proc] = 0;
4627     } else {
4628       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4629       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4630     }
4631     if (len_s[proc]) {
4632       merge->nsend++;
4633       nrows = 0;
4634       for (i=owners[proc]; i<owners[proc+1]; i++) {
4635         if (ai[i+1] > ai[i]) nrows++;
4636       }
4637       len_si[proc] = 2*(nrows+1);
4638       len         += len_si[proc];
4639     }
4640   }
4641 
4642   /* determine the number and length of messages to receive for ij-structure */
4643   /*-------------------------------------------------------------------------*/
4644   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4645   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4646 
4647   /* post the Irecv of j-structure */
4648   /*-------------------------------*/
4649   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4650   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4651 
4652   /* post the Isend of j-structure */
4653   /*--------------------------------*/
4654   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4655 
4656   for (proc=0, k=0; proc<size; proc++) {
4657     if (!len_s[proc]) continue;
4658     i    = owners[proc];
4659     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4660     k++;
4661   }
4662 
4663   /* receives and sends of j-structure are complete */
4664   /*------------------------------------------------*/
4665   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4666   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4667 
4668   /* send and recv i-structure */
4669   /*---------------------------*/
4670   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4671   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4672 
4673   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4674   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4675   for (proc=0,k=0; proc<size; proc++) {
4676     if (!len_s[proc]) continue;
4677     /* form outgoing message for i-structure:
4678          buf_si[0]:                 nrows to be sent
4679                [1:nrows]:           row index (global)
4680                [nrows+1:2*nrows+1]: i-structure index
4681     */
4682     /*-------------------------------------------*/
4683     nrows       = len_si[proc]/2 - 1;
4684     buf_si_i    = buf_si + nrows+1;
4685     buf_si[0]   = nrows;
4686     buf_si_i[0] = 0;
4687     nrows       = 0;
4688     for (i=owners[proc]; i<owners[proc+1]; i++) {
4689       anzi = ai[i+1] - ai[i];
4690       if (anzi) {
4691         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4692         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4693         nrows++;
4694       }
4695     }
4696     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4697     k++;
4698     buf_si += len_si[proc];
4699   }
4700 
4701   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4702   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4703 
4704   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4705   for (i=0; i<merge->nrecv; i++) {
4706     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4707   }
4708 
4709   ierr = PetscFree(len_si);CHKERRQ(ierr);
4710   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4711   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4712   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4713   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4714   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4715   ierr = PetscFree(status);CHKERRQ(ierr);
4716 
4717   /* compute a local seq matrix in each processor */
4718   /*----------------------------------------------*/
4719   /* allocate bi array and free space for accumulating nonzero column info */
4720   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4721   bi[0] = 0;
4722 
4723   /* create and initialize a linked list */
4724   nlnk = N+1;
4725   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4726 
4727   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4728   len  = ai[owners[rank+1]] - ai[owners[rank]];
4729   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4730 
4731   current_space = free_space;
4732 
4733   /* determine symbolic info for each local row */
4734   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4735 
4736   for (k=0; k<merge->nrecv; k++) {
4737     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4738     nrows       = *buf_ri_k[k];
4739     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4740     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4741   }
4742 
4743   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4744   len  = 0;
4745   for (i=0; i<m; i++) {
4746     bnzi = 0;
4747     /* add local non-zero cols of this proc's seqmat into lnk */
4748     arow  = owners[rank] + i;
4749     anzi  = ai[arow+1] - ai[arow];
4750     aj    = a->j + ai[arow];
4751     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4752     bnzi += nlnk;
4753     /* add received col data into lnk */
4754     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4755       if (i == *nextrow[k]) { /* i-th row */
4756         anzi  = *(nextai[k]+1) - *nextai[k];
4757         aj    = buf_rj[k] + *nextai[k];
4758         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4759         bnzi += nlnk;
4760         nextrow[k]++; nextai[k]++;
4761       }
4762     }
4763     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4764 
4765     /* if free space is not available, make more free space */
4766     if (current_space->local_remaining<bnzi) {
4767       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4768       nspacedouble++;
4769     }
4770     /* copy data into free space, then initialize lnk */
4771     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4772     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4773 
4774     current_space->array           += bnzi;
4775     current_space->local_used      += bnzi;
4776     current_space->local_remaining -= bnzi;
4777 
4778     bi[i+1] = bi[i] + bnzi;
4779   }
4780 
4781   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4782 
4783   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4784   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4785   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4786 
4787   /* create symbolic parallel matrix B_mpi */
4788   /*---------------------------------------*/
4789   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4790   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4791   if (n==PETSC_DECIDE) {
4792     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4793   } else {
4794     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4795   }
4796   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4797   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4798   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4799   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4800   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4801 
4802   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4803   B_mpi->assembled    = PETSC_FALSE;
4804   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4805   merge->bi           = bi;
4806   merge->bj           = bj;
4807   merge->buf_ri       = buf_ri;
4808   merge->buf_rj       = buf_rj;
4809   merge->coi          = NULL;
4810   merge->coj          = NULL;
4811   merge->owners_co    = NULL;
4812 
4813   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4814 
4815   /* attach the supporting struct to B_mpi for reuse */
4816   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4817   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4818   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4819   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4820   *mpimat = B_mpi;
4821 
4822   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4823   PetscFunctionReturn(0);
4824 }
4825 
4826 /*@C
4827       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4828                  matrices from each processor
4829 
4830     Collective on MPI_Comm
4831 
4832    Input Parameters:
4833 +    comm - the communicators the parallel matrix will live on
4834 .    seqmat - the input sequential matrices
4835 .    m - number of local rows (or PETSC_DECIDE)
4836 .    n - number of local columns (or PETSC_DECIDE)
4837 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4838 
4839    Output Parameter:
4840 .    mpimat - the parallel matrix generated
4841 
4842     Level: advanced
4843 
4844    Notes:
4845      The dimensions of the sequential matrix in each processor MUST be the same.
4846      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4847      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4848 @*/
4849 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4850 {
4851   PetscErrorCode ierr;
4852   PetscMPIInt    size;
4853 
4854   PetscFunctionBegin;
4855   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4856   if (size == 1) {
4857     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4858     if (scall == MAT_INITIAL_MATRIX) {
4859       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4860     } else {
4861       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4862     }
4863     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4864     PetscFunctionReturn(0);
4865   }
4866   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4867   if (scall == MAT_INITIAL_MATRIX) {
4868     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4869   }
4870   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4871   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4872   PetscFunctionReturn(0);
4873 }
4874 
4875 /*@
4876      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4877           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4878           with MatGetSize()
4879 
4880     Not Collective
4881 
4882    Input Parameters:
4883 +    A - the matrix
4884 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4885 
4886    Output Parameter:
4887 .    A_loc - the local sequential matrix generated
4888 
4889     Level: developer
4890 
4891 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4892 
4893 @*/
4894 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4895 {
4896   PetscErrorCode ierr;
4897   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4898   Mat_SeqAIJ     *mat,*a,*b;
4899   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4900   MatScalar      *aa,*ba,*cam;
4901   PetscScalar    *ca;
4902   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4903   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4904   PetscBool      match;
4905   MPI_Comm       comm;
4906   PetscMPIInt    size;
4907 
4908   PetscFunctionBegin;
4909   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4910   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4911   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4912   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4913   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4914 
4915   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4916   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4917   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4918   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4919   aa = a->a; ba = b->a;
4920   if (scall == MAT_INITIAL_MATRIX) {
4921     if (size == 1) {
4922       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4923       PetscFunctionReturn(0);
4924     }
4925 
4926     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4927     ci[0] = 0;
4928     for (i=0; i<am; i++) {
4929       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4930     }
4931     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4932     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4933     k    = 0;
4934     for (i=0; i<am; i++) {
4935       ncols_o = bi[i+1] - bi[i];
4936       ncols_d = ai[i+1] - ai[i];
4937       /* off-diagonal portion of A */
4938       for (jo=0; jo<ncols_o; jo++) {
4939         col = cmap[*bj];
4940         if (col >= cstart) break;
4941         cj[k]   = col; bj++;
4942         ca[k++] = *ba++;
4943       }
4944       /* diagonal portion of A */
4945       for (j=0; j<ncols_d; j++) {
4946         cj[k]   = cstart + *aj++;
4947         ca[k++] = *aa++;
4948       }
4949       /* off-diagonal portion of A */
4950       for (j=jo; j<ncols_o; j++) {
4951         cj[k]   = cmap[*bj++];
4952         ca[k++] = *ba++;
4953       }
4954     }
4955     /* put together the new matrix */
4956     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4957     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4958     /* Since these are PETSc arrays, change flags to free them as necessary. */
4959     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4960     mat->free_a  = PETSC_TRUE;
4961     mat->free_ij = PETSC_TRUE;
4962     mat->nonew   = 0;
4963   } else if (scall == MAT_REUSE_MATRIX) {
4964     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4965     ci = mat->i; cj = mat->j; cam = mat->a;
4966     for (i=0; i<am; i++) {
4967       /* off-diagonal portion of A */
4968       ncols_o = bi[i+1] - bi[i];
4969       for (jo=0; jo<ncols_o; jo++) {
4970         col = cmap[*bj];
4971         if (col >= cstart) break;
4972         *cam++ = *ba++; bj++;
4973       }
4974       /* diagonal portion of A */
4975       ncols_d = ai[i+1] - ai[i];
4976       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4977       /* off-diagonal portion of A */
4978       for (j=jo; j<ncols_o; j++) {
4979         *cam++ = *ba++; bj++;
4980       }
4981     }
4982   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4983   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4984   PetscFunctionReturn(0);
4985 }
4986 
4987 /*@C
4988      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4989 
4990     Not Collective
4991 
4992    Input Parameters:
4993 +    A - the matrix
4994 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4995 -    row, col - index sets of rows and columns to extract (or NULL)
4996 
4997    Output Parameter:
4998 .    A_loc - the local sequential matrix generated
4999 
5000     Level: developer
5001 
5002 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5003 
5004 @*/
5005 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5006 {
5007   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5008   PetscErrorCode ierr;
5009   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5010   IS             isrowa,iscola;
5011   Mat            *aloc;
5012   PetscBool      match;
5013 
5014   PetscFunctionBegin;
5015   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5016   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5017   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5018   if (!row) {
5019     start = A->rmap->rstart; end = A->rmap->rend;
5020     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5021   } else {
5022     isrowa = *row;
5023   }
5024   if (!col) {
5025     start = A->cmap->rstart;
5026     cmap  = a->garray;
5027     nzA   = a->A->cmap->n;
5028     nzB   = a->B->cmap->n;
5029     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5030     ncols = 0;
5031     for (i=0; i<nzB; i++) {
5032       if (cmap[i] < start) idx[ncols++] = cmap[i];
5033       else break;
5034     }
5035     imark = i;
5036     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5037     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5038     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5039   } else {
5040     iscola = *col;
5041   }
5042   if (scall != MAT_INITIAL_MATRIX) {
5043     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5044     aloc[0] = *A_loc;
5045   }
5046   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5047   *A_loc = aloc[0];
5048   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5049   if (!row) {
5050     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5051   }
5052   if (!col) {
5053     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5054   }
5055   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5056   PetscFunctionReturn(0);
5057 }
5058 
5059 /*@C
5060     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5061 
5062     Collective on Mat
5063 
5064    Input Parameters:
5065 +    A,B - the matrices in mpiaij format
5066 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5067 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5068 
5069    Output Parameter:
5070 +    rowb, colb - index sets of rows and columns of B to extract
5071 -    B_seq - the sequential matrix generated
5072 
5073     Level: developer
5074 
5075 @*/
5076 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5077 {
5078   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5079   PetscErrorCode ierr;
5080   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5081   IS             isrowb,iscolb;
5082   Mat            *bseq=NULL;
5083 
5084   PetscFunctionBegin;
5085   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5086     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5087   }
5088   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5089 
5090   if (scall == MAT_INITIAL_MATRIX) {
5091     start = A->cmap->rstart;
5092     cmap  = a->garray;
5093     nzA   = a->A->cmap->n;
5094     nzB   = a->B->cmap->n;
5095     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5096     ncols = 0;
5097     for (i=0; i<nzB; i++) {  /* row < local row index */
5098       if (cmap[i] < start) idx[ncols++] = cmap[i];
5099       else break;
5100     }
5101     imark = i;
5102     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5103     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5104     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5105     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5106   } else {
5107     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5108     isrowb  = *rowb; iscolb = *colb;
5109     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5110     bseq[0] = *B_seq;
5111   }
5112   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5113   *B_seq = bseq[0];
5114   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5115   if (!rowb) {
5116     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5117   } else {
5118     *rowb = isrowb;
5119   }
5120   if (!colb) {
5121     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5122   } else {
5123     *colb = iscolb;
5124   }
5125   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5126   PetscFunctionReturn(0);
5127 }
5128 
5129 /*
5130     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5131     of the OFF-DIAGONAL portion of local A
5132 
5133     Collective on Mat
5134 
5135    Input Parameters:
5136 +    A,B - the matrices in mpiaij format
5137 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5138 
5139    Output Parameter:
5140 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5141 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5142 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5143 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5144 
5145     Level: developer
5146 
5147 */
5148 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5149 {
5150   VecScatter_MPI_General *gen_to,*gen_from;
5151   PetscErrorCode         ierr;
5152   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5153   Mat_SeqAIJ             *b_oth;
5154   VecScatter             ctx;
5155   MPI_Comm               comm;
5156   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5157   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5158   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5159   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5160   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5161   MPI_Request            *rwaits = NULL,*swaits = NULL;
5162   MPI_Status             *sstatus,rstatus;
5163   PetscMPIInt            jj,size;
5164   VecScatterType         type;
5165   PetscBool              mpi1;
5166 
5167   PetscFunctionBegin;
5168   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5169   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5170 
5171   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5172     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5173   }
5174   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5175   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5176 
5177   if (size == 1) {
5178     startsj_s = NULL;
5179     bufa_ptr  = NULL;
5180     *B_oth    = NULL;
5181     PetscFunctionReturn(0);
5182   }
5183 
5184   ctx = a->Mvctx;
5185   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5186   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5187   if (!mpi1) {
5188     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5189      thus create a->Mvctx_mpi1 */
5190     if (!a->Mvctx_mpi1) {
5191       a->Mvctx_mpi1_flg = PETSC_TRUE;
5192       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5193     }
5194     ctx = a->Mvctx_mpi1;
5195   }
5196   tag = ((PetscObject)ctx)->tag;
5197 
5198   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5199   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5200   nrecvs   = gen_from->n;
5201   nsends   = gen_to->n;
5202 
5203   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5204   srow    = gen_to->indices;    /* local row index to be sent */
5205   sstarts = gen_to->starts;
5206   sprocs  = gen_to->procs;
5207   sstatus = gen_to->sstatus;
5208   sbs     = gen_to->bs;
5209   rstarts = gen_from->starts;
5210   rprocs  = gen_from->procs;
5211   rbs     = gen_from->bs;
5212 
5213   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5214   if (scall == MAT_INITIAL_MATRIX) {
5215     /* i-array */
5216     /*---------*/
5217     /*  post receives */
5218     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5219     for (i=0; i<nrecvs; i++) {
5220       rowlen = rvalues + rstarts[i]*rbs;
5221       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5222       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5223     }
5224 
5225     /* pack the outgoing message */
5226     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5227 
5228     sstartsj[0] = 0;
5229     rstartsj[0] = 0;
5230     len         = 0; /* total length of j or a array to be sent */
5231     k           = 0;
5232     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5233     for (i=0; i<nsends; i++) {
5234       rowlen = svalues + sstarts[i]*sbs;
5235       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5236       for (j=0; j<nrows; j++) {
5237         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5238         for (l=0; l<sbs; l++) {
5239           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5240 
5241           rowlen[j*sbs+l] = ncols;
5242 
5243           len += ncols;
5244           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5245         }
5246         k++;
5247       }
5248       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5249 
5250       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5251     }
5252     /* recvs and sends of i-array are completed */
5253     i = nrecvs;
5254     while (i--) {
5255       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5256     }
5257     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5258     ierr = PetscFree(svalues);CHKERRQ(ierr);
5259 
5260     /* allocate buffers for sending j and a arrays */
5261     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5262     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5263 
5264     /* create i-array of B_oth */
5265     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5266 
5267     b_othi[0] = 0;
5268     len       = 0; /* total length of j or a array to be received */
5269     k         = 0;
5270     for (i=0; i<nrecvs; i++) {
5271       rowlen = rvalues + rstarts[i]*rbs;
5272       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5273       for (j=0; j<nrows; j++) {
5274         b_othi[k+1] = b_othi[k] + rowlen[j];
5275         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5276         k++;
5277       }
5278       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5279     }
5280     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5281 
5282     /* allocate space for j and a arrrays of B_oth */
5283     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5284     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5285 
5286     /* j-array */
5287     /*---------*/
5288     /*  post receives of j-array */
5289     for (i=0; i<nrecvs; i++) {
5290       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5291       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5292     }
5293 
5294     /* pack the outgoing message j-array */
5295     k = 0;
5296     for (i=0; i<nsends; i++) {
5297       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5298       bufJ  = bufj+sstartsj[i];
5299       for (j=0; j<nrows; j++) {
5300         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5301         for (ll=0; ll<sbs; ll++) {
5302           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5303           for (l=0; l<ncols; l++) {
5304             *bufJ++ = cols[l];
5305           }
5306           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5307         }
5308       }
5309       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5310     }
5311 
5312     /* recvs and sends of j-array are completed */
5313     i = nrecvs;
5314     while (i--) {
5315       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5316     }
5317     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5318   } else if (scall == MAT_REUSE_MATRIX) {
5319     sstartsj = *startsj_s;
5320     rstartsj = *startsj_r;
5321     bufa     = *bufa_ptr;
5322     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5323     b_otha   = b_oth->a;
5324   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5325 
5326   /* a-array */
5327   /*---------*/
5328   /*  post receives of a-array */
5329   for (i=0; i<nrecvs; i++) {
5330     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5331     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5332   }
5333 
5334   /* pack the outgoing message a-array */
5335   k = 0;
5336   for (i=0; i<nsends; i++) {
5337     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5338     bufA  = bufa+sstartsj[i];
5339     for (j=0; j<nrows; j++) {
5340       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5341       for (ll=0; ll<sbs; ll++) {
5342         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5343         for (l=0; l<ncols; l++) {
5344           *bufA++ = vals[l];
5345         }
5346         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5347       }
5348     }
5349     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5350   }
5351   /* recvs and sends of a-array are completed */
5352   i = nrecvs;
5353   while (i--) {
5354     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5355   }
5356   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5357   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5358 
5359   if (scall == MAT_INITIAL_MATRIX) {
5360     /* put together the new matrix */
5361     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5362 
5363     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5364     /* Since these are PETSc arrays, change flags to free them as necessary. */
5365     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5366     b_oth->free_a  = PETSC_TRUE;
5367     b_oth->free_ij = PETSC_TRUE;
5368     b_oth->nonew   = 0;
5369 
5370     ierr = PetscFree(bufj);CHKERRQ(ierr);
5371     if (!startsj_s || !bufa_ptr) {
5372       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5373       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5374     } else {
5375       *startsj_s = sstartsj;
5376       *startsj_r = rstartsj;
5377       *bufa_ptr  = bufa;
5378     }
5379   }
5380   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5381   PetscFunctionReturn(0);
5382 }
5383 
5384 /*@C
5385   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5386 
5387   Not Collective
5388 
5389   Input Parameters:
5390 . A - The matrix in mpiaij format
5391 
5392   Output Parameter:
5393 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5394 . colmap - A map from global column index to local index into lvec
5395 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5396 
5397   Level: developer
5398 
5399 @*/
5400 #if defined(PETSC_USE_CTABLE)
5401 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5402 #else
5403 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5404 #endif
5405 {
5406   Mat_MPIAIJ *a;
5407 
5408   PetscFunctionBegin;
5409   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5410   PetscValidPointer(lvec, 2);
5411   PetscValidPointer(colmap, 3);
5412   PetscValidPointer(multScatter, 4);
5413   a = (Mat_MPIAIJ*) A->data;
5414   if (lvec) *lvec = a->lvec;
5415   if (colmap) *colmap = a->colmap;
5416   if (multScatter) *multScatter = a->Mvctx;
5417   PetscFunctionReturn(0);
5418 }
5419 
5420 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5421 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5422 #if defined(PETSC_HAVE_MKL_SPARSE)
5423 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5424 #endif
5425 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5426 #if defined(PETSC_HAVE_ELEMENTAL)
5427 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5428 #endif
5429 #if defined(PETSC_HAVE_HYPRE)
5430 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5431 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5432 #endif
5433 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5434 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5435 
5436 /*
5437     Computes (B'*A')' since computing B*A directly is untenable
5438 
5439                n                       p                          p
5440         (              )       (              )         (                  )
5441       m (      A       )  *  n (       B      )   =   m (         C        )
5442         (              )       (              )         (                  )
5443 
5444 */
5445 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5446 {
5447   PetscErrorCode ierr;
5448   Mat            At,Bt,Ct;
5449 
5450   PetscFunctionBegin;
5451   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5452   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5453   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5454   ierr = MatDestroy(&At);CHKERRQ(ierr);
5455   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5456   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5457   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5458   PetscFunctionReturn(0);
5459 }
5460 
5461 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5462 {
5463   PetscErrorCode ierr;
5464   PetscInt       m=A->rmap->n,n=B->cmap->n;
5465   Mat            Cmat;
5466 
5467   PetscFunctionBegin;
5468   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5469   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5470   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5471   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5472   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5473   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5474   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5475   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5476 
5477   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5478 
5479   *C = Cmat;
5480   PetscFunctionReturn(0);
5481 }
5482 
5483 /* ----------------------------------------------------------------*/
5484 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5485 {
5486   PetscErrorCode ierr;
5487 
5488   PetscFunctionBegin;
5489   if (scall == MAT_INITIAL_MATRIX) {
5490     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5491     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5492     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5493   }
5494   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5495   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5496   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5497   PetscFunctionReturn(0);
5498 }
5499 
5500 /*MC
5501    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5502 
5503    Options Database Keys:
5504 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5505 
5506   Level: beginner
5507 
5508 .seealso: MatCreateAIJ()
5509 M*/
5510 
5511 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5512 {
5513   Mat_MPIAIJ     *b;
5514   PetscErrorCode ierr;
5515   PetscMPIInt    size;
5516 
5517   PetscFunctionBegin;
5518   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5519 
5520   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5521   B->data       = (void*)b;
5522   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5523   B->assembled  = PETSC_FALSE;
5524   B->insertmode = NOT_SET_VALUES;
5525   b->size       = size;
5526 
5527   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5528 
5529   /* build cache for off array entries formed */
5530   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5531 
5532   b->donotstash  = PETSC_FALSE;
5533   b->colmap      = 0;
5534   b->garray      = 0;
5535   b->roworiented = PETSC_TRUE;
5536 
5537   /* stuff used for matrix vector multiply */
5538   b->lvec  = NULL;
5539   b->Mvctx = NULL;
5540 
5541   /* stuff for MatGetRow() */
5542   b->rowindices   = 0;
5543   b->rowvalues    = 0;
5544   b->getrowactive = PETSC_FALSE;
5545 
5546   /* flexible pointer used in CUSP/CUSPARSE classes */
5547   b->spptr = NULL;
5548 
5549   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5550   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5551   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5552   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5553   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5554   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5555   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5556   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5557   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5558 #if defined(PETSC_HAVE_MKL_SPARSE)
5559   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5560 #endif
5561   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5562   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5563 #if defined(PETSC_HAVE_ELEMENTAL)
5564   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5565 #endif
5566 #if defined(PETSC_HAVE_HYPRE)
5567   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5568 #endif
5569   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5570   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5571   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5572   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5573   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5574 #if defined(PETSC_HAVE_HYPRE)
5575   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5576 #endif
5577   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5578   PetscFunctionReturn(0);
5579 }
5580 
5581 /*@C
5582      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5583          and "off-diagonal" part of the matrix in CSR format.
5584 
5585    Collective on MPI_Comm
5586 
5587    Input Parameters:
5588 +  comm - MPI communicator
5589 .  m - number of local rows (Cannot be PETSC_DECIDE)
5590 .  n - This value should be the same as the local size used in creating the
5591        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5592        calculated if N is given) For square matrices n is almost always m.
5593 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5594 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5595 .   i - row indices for "diagonal" portion of matrix
5596 .   j - column indices
5597 .   a - matrix values
5598 .   oi - row indices for "off-diagonal" portion of matrix
5599 .   oj - column indices
5600 -   oa - matrix values
5601 
5602    Output Parameter:
5603 .   mat - the matrix
5604 
5605    Level: advanced
5606 
5607    Notes:
5608        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5609        must free the arrays once the matrix has been destroyed and not before.
5610 
5611        The i and j indices are 0 based
5612 
5613        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5614 
5615        This sets local rows and cannot be used to set off-processor values.
5616 
5617        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5618        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5619        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5620        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5621        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5622        communication if it is known that only local entries will be set.
5623 
5624 .keywords: matrix, aij, compressed row, sparse, parallel
5625 
5626 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5627           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5628 @*/
5629 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5630 {
5631   PetscErrorCode ierr;
5632   Mat_MPIAIJ     *maij;
5633 
5634   PetscFunctionBegin;
5635   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5636   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5637   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5638   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5639   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5640   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5641   maij = (Mat_MPIAIJ*) (*mat)->data;
5642 
5643   (*mat)->preallocated = PETSC_TRUE;
5644 
5645   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5646   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5647 
5648   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5649   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5650 
5651   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5652   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5653   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5654   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5655 
5656   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5657   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5658   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5659   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5660   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5661   PetscFunctionReturn(0);
5662 }
5663 
5664 /*
5665     Special version for direct calls from Fortran
5666 */
5667 #include <petsc/private/fortranimpl.h>
5668 
5669 /* Change these macros so can be used in void function */
5670 #undef CHKERRQ
5671 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5672 #undef SETERRQ2
5673 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5674 #undef SETERRQ3
5675 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5676 #undef SETERRQ
5677 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5678 
5679 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5680 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5681 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5682 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5683 #else
5684 #endif
5685 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5686 {
5687   Mat            mat  = *mmat;
5688   PetscInt       m    = *mm, n = *mn;
5689   InsertMode     addv = *maddv;
5690   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5691   PetscScalar    value;
5692   PetscErrorCode ierr;
5693 
5694   MatCheckPreallocated(mat,1);
5695   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5696 
5697 #if defined(PETSC_USE_DEBUG)
5698   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5699 #endif
5700   {
5701     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5702     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5703     PetscBool roworiented = aij->roworiented;
5704 
5705     /* Some Variables required in the macro */
5706     Mat        A                 = aij->A;
5707     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5708     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5709     MatScalar  *aa               = a->a;
5710     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5711     Mat        B                 = aij->B;
5712     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5713     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5714     MatScalar  *ba               = b->a;
5715 
5716     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5717     PetscInt  nonew = a->nonew;
5718     MatScalar *ap1,*ap2;
5719 
5720     PetscFunctionBegin;
5721     for (i=0; i<m; i++) {
5722       if (im[i] < 0) continue;
5723 #if defined(PETSC_USE_DEBUG)
5724       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5725 #endif
5726       if (im[i] >= rstart && im[i] < rend) {
5727         row      = im[i] - rstart;
5728         lastcol1 = -1;
5729         rp1      = aj + ai[row];
5730         ap1      = aa + ai[row];
5731         rmax1    = aimax[row];
5732         nrow1    = ailen[row];
5733         low1     = 0;
5734         high1    = nrow1;
5735         lastcol2 = -1;
5736         rp2      = bj + bi[row];
5737         ap2      = ba + bi[row];
5738         rmax2    = bimax[row];
5739         nrow2    = bilen[row];
5740         low2     = 0;
5741         high2    = nrow2;
5742 
5743         for (j=0; j<n; j++) {
5744           if (roworiented) value = v[i*n+j];
5745           else value = v[i+j*m];
5746           if (in[j] >= cstart && in[j] < cend) {
5747             col = in[j] - cstart;
5748             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5749             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5750           } else if (in[j] < 0) continue;
5751 #if defined(PETSC_USE_DEBUG)
5752           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5753           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5754 #endif
5755           else {
5756             if (mat->was_assembled) {
5757               if (!aij->colmap) {
5758                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5759               }
5760 #if defined(PETSC_USE_CTABLE)
5761               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5762               col--;
5763 #else
5764               col = aij->colmap[in[j]] - 1;
5765 #endif
5766               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5767               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5768                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5769                 col  =  in[j];
5770                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5771                 B     = aij->B;
5772                 b     = (Mat_SeqAIJ*)B->data;
5773                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5774                 rp2   = bj + bi[row];
5775                 ap2   = ba + bi[row];
5776                 rmax2 = bimax[row];
5777                 nrow2 = bilen[row];
5778                 low2  = 0;
5779                 high2 = nrow2;
5780                 bm    = aij->B->rmap->n;
5781                 ba    = b->a;
5782               }
5783             } else col = in[j];
5784             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5785           }
5786         }
5787       } else if (!aij->donotstash) {
5788         if (roworiented) {
5789           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5790         } else {
5791           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5792         }
5793       }
5794     }
5795   }
5796   PetscFunctionReturnVoid();
5797 }
5798 
5799