xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 9e436492a5f3dfdbfdb4be14586e1a8ebf1e4e34)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938   VecScatter     Mvctx = a->Mvctx;
939 
940   PetscFunctionBegin;
941   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
942   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
943 
944   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
946   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
948   PetscFunctionReturn(0);
949 }
950 
951 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
952 {
953   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
954   PetscErrorCode ierr;
955 
956   PetscFunctionBegin;
957   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
958   PetscFunctionReturn(0);
959 }
960 
961 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
962 {
963   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
964   PetscErrorCode ierr;
965   VecScatter     Mvctx = a->Mvctx;
966 
967   PetscFunctionBegin;
968   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
969   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
970   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
971   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
972   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
973   PetscFunctionReturn(0);
974 }
975 
976 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
977 {
978   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
979   PetscErrorCode ierr;
980   PetscBool      merged;
981 
982   PetscFunctionBegin;
983   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
984   /* do nondiagonal part */
985   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
986   if (!merged) {
987     /* send it on its way */
988     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
989     /* do local part */
990     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
991     /* receive remote parts: note this assumes the values are not actually */
992     /* added in yy until the next line, */
993     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
994   } else {
995     /* do local part */
996     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
997     /* send it on its way */
998     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
999     /* values actually were received in the Begin() but we need to call this nop */
1000     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1001   }
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1006 {
1007   MPI_Comm       comm;
1008   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1009   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1010   IS             Me,Notme;
1011   PetscErrorCode ierr;
1012   PetscInt       M,N,first,last,*notme,i;
1013   PetscMPIInt    size;
1014 
1015   PetscFunctionBegin;
1016   /* Easy test: symmetric diagonal block */
1017   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1018   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1019   if (!*f) PetscFunctionReturn(0);
1020   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1021   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1022   if (size == 1) PetscFunctionReturn(0);
1023 
1024   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1025   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1026   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1027   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1028   for (i=0; i<first; i++) notme[i] = i;
1029   for (i=last; i<M; i++) notme[i-last+first] = i;
1030   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1031   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1032   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1033   Aoff = Aoffs[0];
1034   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1035   Boff = Boffs[0];
1036   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1037   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1038   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1039   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1040   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1041   ierr = PetscFree(notme);CHKERRQ(ierr);
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1046 {
1047   PetscErrorCode ierr;
1048 
1049   PetscFunctionBegin;
1050   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1055 {
1056   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   /* do nondiagonal part */
1061   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1062   /* send it on its way */
1063   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1064   /* do local part */
1065   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1066   /* receive remote parts */
1067   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 /*
1072   This only works correctly for square matrices where the subblock A->A is the
1073    diagonal block
1074 */
1075 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1076 {
1077   PetscErrorCode ierr;
1078   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1079 
1080   PetscFunctionBegin;
1081   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1082   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1083   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1088 {
1089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1090   PetscErrorCode ierr;
1091 
1092   PetscFunctionBegin;
1093   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1094   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1099 {
1100   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104 #if defined(PETSC_USE_LOG)
1105   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1106 #endif
1107   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1108   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1109   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1110   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1111 #if defined(PETSC_USE_CTABLE)
1112   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1113 #else
1114   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1115 #endif
1116   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1117   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1118   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1119   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1120   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1121   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1122   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1123 
1124   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1133 #if defined(PETSC_HAVE_ELEMENTAL)
1134   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1135 #endif
1136 #if defined(PETSC_HAVE_HYPRE)
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1139 #endif
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1144 {
1145   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1146   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1147   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1148   PetscErrorCode ierr;
1149   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1150   int            fd;
1151   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1152   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1153   PetscScalar    *column_values;
1154   PetscInt       message_count,flowcontrolcount;
1155   FILE           *file;
1156 
1157   PetscFunctionBegin;
1158   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1159   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1160   nz   = A->nz + B->nz;
1161   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1162   if (!rank) {
1163     header[0] = MAT_FILE_CLASSID;
1164     header[1] = mat->rmap->N;
1165     header[2] = mat->cmap->N;
1166 
1167     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1168     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1169     /* get largest number of rows any processor has */
1170     rlen  = mat->rmap->n;
1171     range = mat->rmap->range;
1172     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1173   } else {
1174     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1175     rlen = mat->rmap->n;
1176   }
1177 
1178   /* load up the local row counts */
1179   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1180   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1181 
1182   /* store the row lengths to the file */
1183   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1184   if (!rank) {
1185     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1186     for (i=1; i<size; i++) {
1187       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1188       rlen = range[i+1] - range[i];
1189       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1190       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1191     }
1192     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1193   } else {
1194     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1195     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1196     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1197   }
1198   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1199 
1200   /* load up the local column indices */
1201   nzmax = nz; /* th processor needs space a largest processor needs */
1202   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1203   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1204   cnt   = 0;
1205   for (i=0; i<mat->rmap->n; i++) {
1206     for (j=B->i[i]; j<B->i[i+1]; j++) {
1207       if ((col = garray[B->j[j]]) > cstart) break;
1208       column_indices[cnt++] = col;
1209     }
1210     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1211     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1212   }
1213   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1214 
1215   /* store the column indices to the file */
1216   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1217   if (!rank) {
1218     MPI_Status status;
1219     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     for (i=1; i<size; i++) {
1221       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1222       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1223       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1224       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1225       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1226     }
1227     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1228   } else {
1229     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1230     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1233   }
1234   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1235 
1236   /* load up the local column values */
1237   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1238   cnt  = 0;
1239   for (i=0; i<mat->rmap->n; i++) {
1240     for (j=B->i[i]; j<B->i[i+1]; j++) {
1241       if (garray[B->j[j]] > cstart) break;
1242       column_values[cnt++] = B->a[j];
1243     }
1244     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1245     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1246   }
1247   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1248 
1249   /* store the column values to the file */
1250   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1251   if (!rank) {
1252     MPI_Status status;
1253     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1254     for (i=1; i<size; i++) {
1255       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1256       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1257       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1258       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1259       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1260     }
1261     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1262   } else {
1263     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1264     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1265     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1266     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1267   }
1268   ierr = PetscFree(column_values);CHKERRQ(ierr);
1269 
1270   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1271   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #include <petscdraw.h>
1276 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1277 {
1278   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1279   PetscErrorCode    ierr;
1280   PetscMPIInt       rank = aij->rank,size = aij->size;
1281   PetscBool         isdraw,iascii,isbinary;
1282   PetscViewer       sviewer;
1283   PetscViewerFormat format;
1284 
1285   PetscFunctionBegin;
1286   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1287   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1288   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1289   if (iascii) {
1290     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1291     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1292       MatInfo   info;
1293       PetscBool inodes;
1294 
1295       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1296       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1297       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1298       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1299       if (!inodes) {
1300         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1301                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1302       } else {
1303         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1304                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1305       }
1306       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1307       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1308       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1309       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1310       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1311       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1312       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1313       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1314       PetscFunctionReturn(0);
1315     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1316       PetscInt inodecount,inodelimit,*inodes;
1317       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1318       if (inodes) {
1319         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1320       } else {
1321         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1322       }
1323       PetscFunctionReturn(0);
1324     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1325       PetscFunctionReturn(0);
1326     }
1327   } else if (isbinary) {
1328     if (size == 1) {
1329       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1330       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1331     } else {
1332       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1333     }
1334     PetscFunctionReturn(0);
1335   } else if (isdraw) {
1336     PetscDraw draw;
1337     PetscBool isnull;
1338     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1339     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1340     if (isnull) PetscFunctionReturn(0);
1341   }
1342 
1343   {
1344     /* assemble the entire matrix onto first processor. */
1345     Mat        A;
1346     Mat_SeqAIJ *Aloc;
1347     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1348     MatScalar  *a;
1349 
1350     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1351     if (!rank) {
1352       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1353     } else {
1354       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1355     }
1356     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1357     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1358     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1359     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1360     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1361 
1362     /* copy over the A part */
1363     Aloc = (Mat_SeqAIJ*)aij->A->data;
1364     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1365     row  = mat->rmap->rstart;
1366     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1367     for (i=0; i<m; i++) {
1368       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1369       row++;
1370       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1371     }
1372     aj = Aloc->j;
1373     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1374 
1375     /* copy over the B part */
1376     Aloc = (Mat_SeqAIJ*)aij->B->data;
1377     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1378     row  = mat->rmap->rstart;
1379     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1380     ct   = cols;
1381     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1382     for (i=0; i<m; i++) {
1383       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1384       row++;
1385       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1386     }
1387     ierr = PetscFree(ct);CHKERRQ(ierr);
1388     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1389     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390     /*
1391        Everyone has to call to draw the matrix since the graphics waits are
1392        synchronized across all processors that share the PetscDraw object
1393     */
1394     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1395     if (!rank) {
1396       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1397       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1398     }
1399     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1400     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1401     ierr = MatDestroy(&A);CHKERRQ(ierr);
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1407 {
1408   PetscErrorCode ierr;
1409   PetscBool      iascii,isdraw,issocket,isbinary;
1410 
1411   PetscFunctionBegin;
1412   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1413   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1414   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1415   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1416   if (iascii || isdraw || isbinary || issocket) {
1417     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1418   }
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1423 {
1424   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1425   PetscErrorCode ierr;
1426   Vec            bb1 = 0;
1427   PetscBool      hasop;
1428 
1429   PetscFunctionBegin;
1430   if (flag == SOR_APPLY_UPPER) {
1431     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1432     PetscFunctionReturn(0);
1433   }
1434 
1435   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1436     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1437   }
1438 
1439   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1440     if (flag & SOR_ZERO_INITIAL_GUESS) {
1441       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1442       its--;
1443     }
1444 
1445     while (its--) {
1446       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1447       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1448 
1449       /* update rhs: bb1 = bb - B*x */
1450       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1451       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1452 
1453       /* local sweep */
1454       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1455     }
1456   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1457     if (flag & SOR_ZERO_INITIAL_GUESS) {
1458       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1459       its--;
1460     }
1461     while (its--) {
1462       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1463       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1464 
1465       /* update rhs: bb1 = bb - B*x */
1466       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1467       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1468 
1469       /* local sweep */
1470       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1471     }
1472   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1473     if (flag & SOR_ZERO_INITIAL_GUESS) {
1474       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1475       its--;
1476     }
1477     while (its--) {
1478       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1479       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1480 
1481       /* update rhs: bb1 = bb - B*x */
1482       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1483       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1484 
1485       /* local sweep */
1486       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1487     }
1488   } else if (flag & SOR_EISENSTAT) {
1489     Vec xx1;
1490 
1491     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1492     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1493 
1494     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1495     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1496     if (!mat->diag) {
1497       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1498       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1499     }
1500     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1501     if (hasop) {
1502       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1503     } else {
1504       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1505     }
1506     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1507 
1508     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1509 
1510     /* local sweep */
1511     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1512     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1513     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1514   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1515 
1516   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1517 
1518   matin->factorerrortype = mat->A->factorerrortype;
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1523 {
1524   Mat            aA,aB,Aperm;
1525   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1526   PetscScalar    *aa,*ba;
1527   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1528   PetscSF        rowsf,sf;
1529   IS             parcolp = NULL;
1530   PetscBool      done;
1531   PetscErrorCode ierr;
1532 
1533   PetscFunctionBegin;
1534   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1535   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1536   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1537   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1538 
1539   /* Invert row permutation to find out where my rows should go */
1540   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1541   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1542   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1543   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1544   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1545   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1546 
1547   /* Invert column permutation to find out where my columns should go */
1548   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1549   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1550   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1551   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1552   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1553   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1554   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1555 
1556   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1557   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1558   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1559 
1560   /* Find out where my gcols should go */
1561   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1562   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1563   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1564   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1565   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1566   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1567   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1568   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1569 
1570   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1571   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1572   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1573   for (i=0; i<m; i++) {
1574     PetscInt row = rdest[i],rowner;
1575     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1576     for (j=ai[i]; j<ai[i+1]; j++) {
1577       PetscInt cowner,col = cdest[aj[j]];
1578       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1579       if (rowner == cowner) dnnz[i]++;
1580       else onnz[i]++;
1581     }
1582     for (j=bi[i]; j<bi[i+1]; j++) {
1583       PetscInt cowner,col = gcdest[bj[j]];
1584       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1585       if (rowner == cowner) dnnz[i]++;
1586       else onnz[i]++;
1587     }
1588   }
1589   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1590   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1591   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1592   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1593   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1594 
1595   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1596   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1597   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1598   for (i=0; i<m; i++) {
1599     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1600     PetscInt j0,rowlen;
1601     rowlen = ai[i+1] - ai[i];
1602     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1603       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1604       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1605     }
1606     rowlen = bi[i+1] - bi[i];
1607     for (j0=j=0; j<rowlen; j0=j) {
1608       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1609       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1610     }
1611   }
1612   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1613   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1614   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1615   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1616   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1617   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1618   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1619   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1620   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1621   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1622   *B = Aperm;
1623   PetscFunctionReturn(0);
1624 }
1625 
1626 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1627 {
1628   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1629   PetscErrorCode ierr;
1630 
1631   PetscFunctionBegin;
1632   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1633   if (ghosts) *ghosts = aij->garray;
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1638 {
1639   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1640   Mat            A    = mat->A,B = mat->B;
1641   PetscErrorCode ierr;
1642   PetscReal      isend[5],irecv[5];
1643 
1644   PetscFunctionBegin;
1645   info->block_size = 1.0;
1646   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1647 
1648   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1649   isend[3] = info->memory;  isend[4] = info->mallocs;
1650 
1651   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1652 
1653   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1654   isend[3] += info->memory;  isend[4] += info->mallocs;
1655   if (flag == MAT_LOCAL) {
1656     info->nz_used      = isend[0];
1657     info->nz_allocated = isend[1];
1658     info->nz_unneeded  = isend[2];
1659     info->memory       = isend[3];
1660     info->mallocs      = isend[4];
1661   } else if (flag == MAT_GLOBAL_MAX) {
1662     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1663 
1664     info->nz_used      = irecv[0];
1665     info->nz_allocated = irecv[1];
1666     info->nz_unneeded  = irecv[2];
1667     info->memory       = irecv[3];
1668     info->mallocs      = irecv[4];
1669   } else if (flag == MAT_GLOBAL_SUM) {
1670     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1671 
1672     info->nz_used      = irecv[0];
1673     info->nz_allocated = irecv[1];
1674     info->nz_unneeded  = irecv[2];
1675     info->memory       = irecv[3];
1676     info->mallocs      = irecv[4];
1677   }
1678   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1679   info->fill_ratio_needed = 0;
1680   info->factor_mallocs    = 0;
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1685 {
1686   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1687   PetscErrorCode ierr;
1688 
1689   PetscFunctionBegin;
1690   switch (op) {
1691   case MAT_NEW_NONZERO_LOCATIONS:
1692   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1693   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1694   case MAT_KEEP_NONZERO_PATTERN:
1695   case MAT_NEW_NONZERO_LOCATION_ERR:
1696   case MAT_USE_INODES:
1697   case MAT_IGNORE_ZERO_ENTRIES:
1698     MatCheckPreallocated(A,1);
1699     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1700     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1701     break;
1702   case MAT_ROW_ORIENTED:
1703     MatCheckPreallocated(A,1);
1704     a->roworiented = flg;
1705 
1706     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1707     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1708     break;
1709   case MAT_NEW_DIAGONALS:
1710     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1711     break;
1712   case MAT_IGNORE_OFF_PROC_ENTRIES:
1713     a->donotstash = flg;
1714     break;
1715   case MAT_SPD:
1716     A->spd_set = PETSC_TRUE;
1717     A->spd     = flg;
1718     if (flg) {
1719       A->symmetric                  = PETSC_TRUE;
1720       A->structurally_symmetric     = PETSC_TRUE;
1721       A->symmetric_set              = PETSC_TRUE;
1722       A->structurally_symmetric_set = PETSC_TRUE;
1723     }
1724     break;
1725   case MAT_SYMMETRIC:
1726     MatCheckPreallocated(A,1);
1727     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1728     break;
1729   case MAT_STRUCTURALLY_SYMMETRIC:
1730     MatCheckPreallocated(A,1);
1731     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1732     break;
1733   case MAT_HERMITIAN:
1734     MatCheckPreallocated(A,1);
1735     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1736     break;
1737   case MAT_SYMMETRY_ETERNAL:
1738     MatCheckPreallocated(A,1);
1739     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1740     break;
1741   case MAT_SUBMAT_SINGLEIS:
1742     A->submat_singleis = flg;
1743     break;
1744   case MAT_STRUCTURE_ONLY:
1745     /* The option is handled directly by MatSetOption() */
1746     break;
1747   default:
1748     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1749   }
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1754 {
1755   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1756   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1757   PetscErrorCode ierr;
1758   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1759   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1760   PetscInt       *cmap,*idx_p;
1761 
1762   PetscFunctionBegin;
1763   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1764   mat->getrowactive = PETSC_TRUE;
1765 
1766   if (!mat->rowvalues && (idx || v)) {
1767     /*
1768         allocate enough space to hold information from the longest row.
1769     */
1770     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1771     PetscInt   max = 1,tmp;
1772     for (i=0; i<matin->rmap->n; i++) {
1773       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1774       if (max < tmp) max = tmp;
1775     }
1776     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1777   }
1778 
1779   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1780   lrow = row - rstart;
1781 
1782   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1783   if (!v)   {pvA = 0; pvB = 0;}
1784   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1785   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1786   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1787   nztot = nzA + nzB;
1788 
1789   cmap = mat->garray;
1790   if (v  || idx) {
1791     if (nztot) {
1792       /* Sort by increasing column numbers, assuming A and B already sorted */
1793       PetscInt imark = -1;
1794       if (v) {
1795         *v = v_p = mat->rowvalues;
1796         for (i=0; i<nzB; i++) {
1797           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1798           else break;
1799         }
1800         imark = i;
1801         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1802         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1803       }
1804       if (idx) {
1805         *idx = idx_p = mat->rowindices;
1806         if (imark > -1) {
1807           for (i=0; i<imark; i++) {
1808             idx_p[i] = cmap[cworkB[i]];
1809           }
1810         } else {
1811           for (i=0; i<nzB; i++) {
1812             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1813             else break;
1814           }
1815           imark = i;
1816         }
1817         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1818         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1819       }
1820     } else {
1821       if (idx) *idx = 0;
1822       if (v)   *v   = 0;
1823     }
1824   }
1825   *nz  = nztot;
1826   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1827   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1828   PetscFunctionReturn(0);
1829 }
1830 
1831 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1832 {
1833   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1834 
1835   PetscFunctionBegin;
1836   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1837   aij->getrowactive = PETSC_FALSE;
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1842 {
1843   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1844   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1845   PetscErrorCode ierr;
1846   PetscInt       i,j,cstart = mat->cmap->rstart;
1847   PetscReal      sum = 0.0;
1848   MatScalar      *v;
1849 
1850   PetscFunctionBegin;
1851   if (aij->size == 1) {
1852     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1853   } else {
1854     if (type == NORM_FROBENIUS) {
1855       v = amat->a;
1856       for (i=0; i<amat->nz; i++) {
1857         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1858       }
1859       v = bmat->a;
1860       for (i=0; i<bmat->nz; i++) {
1861         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1862       }
1863       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1864       *norm = PetscSqrtReal(*norm);
1865       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1866     } else if (type == NORM_1) { /* max column norm */
1867       PetscReal *tmp,*tmp2;
1868       PetscInt  *jj,*garray = aij->garray;
1869       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1870       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1871       *norm = 0.0;
1872       v     = amat->a; jj = amat->j;
1873       for (j=0; j<amat->nz; j++) {
1874         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1875       }
1876       v = bmat->a; jj = bmat->j;
1877       for (j=0; j<bmat->nz; j++) {
1878         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1879       }
1880       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1881       for (j=0; j<mat->cmap->N; j++) {
1882         if (tmp2[j] > *norm) *norm = tmp2[j];
1883       }
1884       ierr = PetscFree(tmp);CHKERRQ(ierr);
1885       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1886       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1887     } else if (type == NORM_INFINITY) { /* max row norm */
1888       PetscReal ntemp = 0.0;
1889       for (j=0; j<aij->A->rmap->n; j++) {
1890         v   = amat->a + amat->i[j];
1891         sum = 0.0;
1892         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1893           sum += PetscAbsScalar(*v); v++;
1894         }
1895         v = bmat->a + bmat->i[j];
1896         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1897           sum += PetscAbsScalar(*v); v++;
1898         }
1899         if (sum > ntemp) ntemp = sum;
1900       }
1901       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1902       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1903     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1904   }
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1909 {
1910   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1911   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1912   PetscErrorCode ierr;
1913   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1914   PetscInt       cstart = A->cmap->rstart,ncol;
1915   Mat            B;
1916   MatScalar      *array;
1917 
1918   PetscFunctionBegin;
1919   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1920 
1921   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1922   ai = Aloc->i; aj = Aloc->j;
1923   bi = Bloc->i; bj = Bloc->j;
1924   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1925     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1926     PetscSFNode          *oloc;
1927     PETSC_UNUSED PetscSF sf;
1928 
1929     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1930     /* compute d_nnz for preallocation */
1931     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1932     for (i=0; i<ai[ma]; i++) {
1933       d_nnz[aj[i]]++;
1934       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1935     }
1936     /* compute local off-diagonal contributions */
1937     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1938     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1939     /* map those to global */
1940     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1941     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1942     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1943     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1944     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1945     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1946     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1947 
1948     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1949     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1950     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1951     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1952     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1953     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1954   } else {
1955     B    = *matout;
1956     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1957     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1958   }
1959 
1960   /* copy over the A part */
1961   array = Aloc->a;
1962   row   = A->rmap->rstart;
1963   for (i=0; i<ma; i++) {
1964     ncol = ai[i+1]-ai[i];
1965     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1966     row++;
1967     array += ncol; aj += ncol;
1968   }
1969   aj = Aloc->j;
1970   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1971 
1972   /* copy over the B part */
1973   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1974   array = Bloc->a;
1975   row   = A->rmap->rstart;
1976   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1977   cols_tmp = cols;
1978   for (i=0; i<mb; i++) {
1979     ncol = bi[i+1]-bi[i];
1980     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1981     row++;
1982     array += ncol; cols_tmp += ncol;
1983   }
1984   ierr = PetscFree(cols);CHKERRQ(ierr);
1985 
1986   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1987   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1988   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1989     *matout = B;
1990   } else {
1991     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1992   }
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1997 {
1998   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1999   Mat            a    = aij->A,b = aij->B;
2000   PetscErrorCode ierr;
2001   PetscInt       s1,s2,s3;
2002 
2003   PetscFunctionBegin;
2004   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2005   if (rr) {
2006     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2007     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2008     /* Overlap communication with computation. */
2009     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2010   }
2011   if (ll) {
2012     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2013     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2014     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2015   }
2016   /* scale  the diagonal block */
2017   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2018 
2019   if (rr) {
2020     /* Do a scatter end and then right scale the off-diagonal block */
2021     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2022     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2023   }
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2028 {
2029   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2030   PetscErrorCode ierr;
2031 
2032   PetscFunctionBegin;
2033   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2038 {
2039   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2040   Mat            a,b,c,d;
2041   PetscBool      flg;
2042   PetscErrorCode ierr;
2043 
2044   PetscFunctionBegin;
2045   a = matA->A; b = matA->B;
2046   c = matB->A; d = matB->B;
2047 
2048   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2049   if (flg) {
2050     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2051   }
2052   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2057 {
2058   PetscErrorCode ierr;
2059   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2060   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2061 
2062   PetscFunctionBegin;
2063   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2064   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2065     /* because of the column compression in the off-processor part of the matrix a->B,
2066        the number of columns in a->B and b->B may be different, hence we cannot call
2067        the MatCopy() directly on the two parts. If need be, we can provide a more
2068        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2069        then copying the submatrices */
2070     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2071   } else {
2072     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2073     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2074   }
2075   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2080 {
2081   PetscErrorCode ierr;
2082 
2083   PetscFunctionBegin;
2084   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 /*
2089    Computes the number of nonzeros per row needed for preallocation when X and Y
2090    have different nonzero structure.
2091 */
2092 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2093 {
2094   PetscInt       i,j,k,nzx,nzy;
2095 
2096   PetscFunctionBegin;
2097   /* Set the number of nonzeros in the new matrix */
2098   for (i=0; i<m; i++) {
2099     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2100     nzx = xi[i+1] - xi[i];
2101     nzy = yi[i+1] - yi[i];
2102     nnz[i] = 0;
2103     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2104       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2105       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2106       nnz[i]++;
2107     }
2108     for (; k<nzy; k++) nnz[i]++;
2109   }
2110   PetscFunctionReturn(0);
2111 }
2112 
2113 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2114 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2115 {
2116   PetscErrorCode ierr;
2117   PetscInt       m = Y->rmap->N;
2118   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2119   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2120 
2121   PetscFunctionBegin;
2122   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2127 {
2128   PetscErrorCode ierr;
2129   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2130   PetscBLASInt   bnz,one=1;
2131   Mat_SeqAIJ     *x,*y;
2132 
2133   PetscFunctionBegin;
2134   if (str == SAME_NONZERO_PATTERN) {
2135     PetscScalar alpha = a;
2136     x    = (Mat_SeqAIJ*)xx->A->data;
2137     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2138     y    = (Mat_SeqAIJ*)yy->A->data;
2139     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2140     x    = (Mat_SeqAIJ*)xx->B->data;
2141     y    = (Mat_SeqAIJ*)yy->B->data;
2142     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2143     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2144     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2145   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2146     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2147   } else {
2148     Mat      B;
2149     PetscInt *nnz_d,*nnz_o;
2150     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2151     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2152     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2153     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2154     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2155     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2156     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2157     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2158     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2159     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2160     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2161     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2162     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2163     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2164   }
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2169 
2170 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2171 {
2172 #if defined(PETSC_USE_COMPLEX)
2173   PetscErrorCode ierr;
2174   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2178   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2179 #else
2180   PetscFunctionBegin;
2181 #endif
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2186 {
2187   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2188   PetscErrorCode ierr;
2189 
2190   PetscFunctionBegin;
2191   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2192   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2197 {
2198   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2199   PetscErrorCode ierr;
2200 
2201   PetscFunctionBegin;
2202   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2203   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2208 {
2209   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2210   PetscErrorCode ierr;
2211   PetscInt       i,*idxb = 0;
2212   PetscScalar    *va,*vb;
2213   Vec            vtmp;
2214 
2215   PetscFunctionBegin;
2216   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2217   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2218   if (idx) {
2219     for (i=0; i<A->rmap->n; i++) {
2220       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2221     }
2222   }
2223 
2224   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2225   if (idx) {
2226     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2227   }
2228   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2229   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2230 
2231   for (i=0; i<A->rmap->n; i++) {
2232     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2233       va[i] = vb[i];
2234       if (idx) idx[i] = a->garray[idxb[i]];
2235     }
2236   }
2237 
2238   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2239   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2240   ierr = PetscFree(idxb);CHKERRQ(ierr);
2241   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2242   PetscFunctionReturn(0);
2243 }
2244 
2245 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2246 {
2247   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2248   PetscErrorCode ierr;
2249   PetscInt       i,*idxb = 0;
2250   PetscScalar    *va,*vb;
2251   Vec            vtmp;
2252 
2253   PetscFunctionBegin;
2254   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2255   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2256   if (idx) {
2257     for (i=0; i<A->cmap->n; i++) {
2258       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2259     }
2260   }
2261 
2262   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2263   if (idx) {
2264     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2265   }
2266   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2267   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2268 
2269   for (i=0; i<A->rmap->n; i++) {
2270     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2271       va[i] = vb[i];
2272       if (idx) idx[i] = a->garray[idxb[i]];
2273     }
2274   }
2275 
2276   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2277   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2278   ierr = PetscFree(idxb);CHKERRQ(ierr);
2279   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2280   PetscFunctionReturn(0);
2281 }
2282 
2283 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2284 {
2285   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2286   PetscInt       n      = A->rmap->n;
2287   PetscInt       cstart = A->cmap->rstart;
2288   PetscInt       *cmap  = mat->garray;
2289   PetscInt       *diagIdx, *offdiagIdx;
2290   Vec            diagV, offdiagV;
2291   PetscScalar    *a, *diagA, *offdiagA;
2292   PetscInt       r;
2293   PetscErrorCode ierr;
2294 
2295   PetscFunctionBegin;
2296   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2297   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2298   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2299   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2300   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2301   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2302   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2303   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2304   for (r = 0; r < n; ++r) {
2305     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2306       a[r]   = diagA[r];
2307       idx[r] = cstart + diagIdx[r];
2308     } else {
2309       a[r]   = offdiagA[r];
2310       idx[r] = cmap[offdiagIdx[r]];
2311     }
2312   }
2313   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2314   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2315   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2316   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2317   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2318   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2323 {
2324   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2325   PetscInt       n      = A->rmap->n;
2326   PetscInt       cstart = A->cmap->rstart;
2327   PetscInt       *cmap  = mat->garray;
2328   PetscInt       *diagIdx, *offdiagIdx;
2329   Vec            diagV, offdiagV;
2330   PetscScalar    *a, *diagA, *offdiagA;
2331   PetscInt       r;
2332   PetscErrorCode ierr;
2333 
2334   PetscFunctionBegin;
2335   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2336   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2337   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2338   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2339   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2340   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2341   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2342   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2343   for (r = 0; r < n; ++r) {
2344     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2345       a[r]   = diagA[r];
2346       idx[r] = cstart + diagIdx[r];
2347     } else {
2348       a[r]   = offdiagA[r];
2349       idx[r] = cmap[offdiagIdx[r]];
2350     }
2351   }
2352   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2353   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2354   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2355   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2356   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2357   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2362 {
2363   PetscErrorCode ierr;
2364   Mat            *dummy;
2365 
2366   PetscFunctionBegin;
2367   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2368   *newmat = *dummy;
2369   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2370   PetscFunctionReturn(0);
2371 }
2372 
2373 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2374 {
2375   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2376   PetscErrorCode ierr;
2377 
2378   PetscFunctionBegin;
2379   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2380   A->factorerrortype = a->A->factorerrortype;
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2385 {
2386   PetscErrorCode ierr;
2387   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2388 
2389   PetscFunctionBegin;
2390   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2391   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2392   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2393   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2398 {
2399   PetscFunctionBegin;
2400   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2401   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 /*@
2406    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2407 
2408    Collective on Mat
2409 
2410    Input Parameters:
2411 +    A - the matrix
2412 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2413 
2414  Level: advanced
2415 
2416 @*/
2417 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2418 {
2419   PetscErrorCode       ierr;
2420 
2421   PetscFunctionBegin;
2422   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2427 {
2428   PetscErrorCode       ierr;
2429   PetscBool            sc = PETSC_FALSE,flg;
2430 
2431   PetscFunctionBegin;
2432   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2433   ierr = PetscObjectOptionsBegin((PetscObject)A);
2434     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2435     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2436     if (flg) {
2437       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2438     }
2439   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2444 {
2445   PetscErrorCode ierr;
2446   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2447   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2448 
2449   PetscFunctionBegin;
2450   if (!Y->preallocated) {
2451     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2452   } else if (!aij->nz) {
2453     PetscInt nonew = aij->nonew;
2454     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2455     aij->nonew = nonew;
2456   }
2457   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2462 {
2463   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2464   PetscErrorCode ierr;
2465 
2466   PetscFunctionBegin;
2467   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2468   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2469   if (d) {
2470     PetscInt rstart;
2471     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2472     *d += rstart;
2473 
2474   }
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 
2479 /* -------------------------------------------------------------------*/
2480 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2481                                        MatGetRow_MPIAIJ,
2482                                        MatRestoreRow_MPIAIJ,
2483                                        MatMult_MPIAIJ,
2484                                 /* 4*/ MatMultAdd_MPIAIJ,
2485                                        MatMultTranspose_MPIAIJ,
2486                                        MatMultTransposeAdd_MPIAIJ,
2487                                        0,
2488                                        0,
2489                                        0,
2490                                 /*10*/ 0,
2491                                        0,
2492                                        0,
2493                                        MatSOR_MPIAIJ,
2494                                        MatTranspose_MPIAIJ,
2495                                 /*15*/ MatGetInfo_MPIAIJ,
2496                                        MatEqual_MPIAIJ,
2497                                        MatGetDiagonal_MPIAIJ,
2498                                        MatDiagonalScale_MPIAIJ,
2499                                        MatNorm_MPIAIJ,
2500                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2501                                        MatAssemblyEnd_MPIAIJ,
2502                                        MatSetOption_MPIAIJ,
2503                                        MatZeroEntries_MPIAIJ,
2504                                 /*24*/ MatZeroRows_MPIAIJ,
2505                                        0,
2506                                        0,
2507                                        0,
2508                                        0,
2509                                 /*29*/ MatSetUp_MPIAIJ,
2510                                        0,
2511                                        0,
2512                                        MatGetDiagonalBlock_MPIAIJ,
2513                                        0,
2514                                 /*34*/ MatDuplicate_MPIAIJ,
2515                                        0,
2516                                        0,
2517                                        0,
2518                                        0,
2519                                 /*39*/ MatAXPY_MPIAIJ,
2520                                        MatCreateSubMatrices_MPIAIJ,
2521                                        MatIncreaseOverlap_MPIAIJ,
2522                                        MatGetValues_MPIAIJ,
2523                                        MatCopy_MPIAIJ,
2524                                 /*44*/ MatGetRowMax_MPIAIJ,
2525                                        MatScale_MPIAIJ,
2526                                        MatShift_MPIAIJ,
2527                                        MatDiagonalSet_MPIAIJ,
2528                                        MatZeroRowsColumns_MPIAIJ,
2529                                 /*49*/ MatSetRandom_MPIAIJ,
2530                                        0,
2531                                        0,
2532                                        0,
2533                                        0,
2534                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2535                                        0,
2536                                        MatSetUnfactored_MPIAIJ,
2537                                        MatPermute_MPIAIJ,
2538                                        0,
2539                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2540                                        MatDestroy_MPIAIJ,
2541                                        MatView_MPIAIJ,
2542                                        0,
2543                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2544                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2545                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2546                                        0,
2547                                        0,
2548                                        0,
2549                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2550                                        MatGetRowMinAbs_MPIAIJ,
2551                                        0,
2552                                        0,
2553                                        0,
2554                                        0,
2555                                 /*75*/ MatFDColoringApply_AIJ,
2556                                        MatSetFromOptions_MPIAIJ,
2557                                        0,
2558                                        0,
2559                                        MatFindZeroDiagonals_MPIAIJ,
2560                                 /*80*/ 0,
2561                                        0,
2562                                        0,
2563                                 /*83*/ MatLoad_MPIAIJ,
2564                                        MatIsSymmetric_MPIAIJ,
2565                                        0,
2566                                        0,
2567                                        0,
2568                                        0,
2569                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2570                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2571                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2572                                        MatPtAP_MPIAIJ_MPIAIJ,
2573                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2574                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2575                                        0,
2576                                        0,
2577                                        0,
2578                                        0,
2579                                 /*99*/ 0,
2580                                        0,
2581                                        0,
2582                                        MatConjugate_MPIAIJ,
2583                                        0,
2584                                 /*104*/MatSetValuesRow_MPIAIJ,
2585                                        MatRealPart_MPIAIJ,
2586                                        MatImaginaryPart_MPIAIJ,
2587                                        0,
2588                                        0,
2589                                 /*109*/0,
2590                                        0,
2591                                        MatGetRowMin_MPIAIJ,
2592                                        0,
2593                                        MatMissingDiagonal_MPIAIJ,
2594                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2595                                        0,
2596                                        MatGetGhosts_MPIAIJ,
2597                                        0,
2598                                        0,
2599                                 /*119*/0,
2600                                        0,
2601                                        0,
2602                                        0,
2603                                        MatGetMultiProcBlock_MPIAIJ,
2604                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2605                                        MatGetColumnNorms_MPIAIJ,
2606                                        MatInvertBlockDiagonal_MPIAIJ,
2607                                        0,
2608                                        MatCreateSubMatricesMPI_MPIAIJ,
2609                                 /*129*/0,
2610                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2611                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2612                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2613                                        0,
2614                                 /*134*/0,
2615                                        0,
2616                                        MatRARt_MPIAIJ_MPIAIJ,
2617                                        0,
2618                                        0,
2619                                 /*139*/MatSetBlockSizes_MPIAIJ,
2620                                        0,
2621                                        0,
2622                                        MatFDColoringSetUp_MPIXAIJ,
2623                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2624                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2625 };
2626 
2627 /* ----------------------------------------------------------------------------------------*/
2628 
2629 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2630 {
2631   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2632   PetscErrorCode ierr;
2633 
2634   PetscFunctionBegin;
2635   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2636   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2637   PetscFunctionReturn(0);
2638 }
2639 
2640 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2641 {
2642   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2643   PetscErrorCode ierr;
2644 
2645   PetscFunctionBegin;
2646   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2647   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2652 {
2653   Mat_MPIAIJ     *b;
2654   PetscErrorCode ierr;
2655 
2656   PetscFunctionBegin;
2657   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2658   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2659   b = (Mat_MPIAIJ*)B->data;
2660 
2661 #if defined(PETSC_USE_CTABLE)
2662   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2663 #else
2664   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2665 #endif
2666   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2667   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2668   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2669 
2670   /* Because the B will have been resized we simply destroy it and create a new one each time */
2671   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2672   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2673   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2674   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2675   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2676   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2677 
2678   if (!B->preallocated) {
2679     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2680     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2681     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2682     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2683     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2684   }
2685 
2686   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2687   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2688   B->preallocated  = PETSC_TRUE;
2689   B->was_assembled = PETSC_FALSE;
2690   B->assembled     = PETSC_FALSE;;
2691   PetscFunctionReturn(0);
2692 }
2693 
2694 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2695 {
2696   Mat_MPIAIJ     *b;
2697   PetscErrorCode ierr;
2698 
2699   PetscFunctionBegin;
2700   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2701   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2702   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2703   b = (Mat_MPIAIJ*)B->data;
2704 
2705 #if defined(PETSC_USE_CTABLE)
2706   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2707 #else
2708   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2709 #endif
2710   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2711   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2712   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2713 
2714   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2715   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2716   B->preallocated  = PETSC_TRUE;
2717   B->was_assembled = PETSC_FALSE;
2718   B->assembled = PETSC_FALSE;
2719   PetscFunctionReturn(0);
2720 }
2721 
2722 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2723 {
2724   Mat            mat;
2725   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2726   PetscErrorCode ierr;
2727 
2728   PetscFunctionBegin;
2729   *newmat = 0;
2730   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2731   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2732   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2733   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2734   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2735   a       = (Mat_MPIAIJ*)mat->data;
2736 
2737   mat->factortype   = matin->factortype;
2738   mat->assembled    = PETSC_TRUE;
2739   mat->insertmode   = NOT_SET_VALUES;
2740   mat->preallocated = PETSC_TRUE;
2741 
2742   a->size         = oldmat->size;
2743   a->rank         = oldmat->rank;
2744   a->donotstash   = oldmat->donotstash;
2745   a->roworiented  = oldmat->roworiented;
2746   a->rowindices   = 0;
2747   a->rowvalues    = 0;
2748   a->getrowactive = PETSC_FALSE;
2749 
2750   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2751   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2752 
2753   if (oldmat->colmap) {
2754 #if defined(PETSC_USE_CTABLE)
2755     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2756 #else
2757     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2758     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2759     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2760 #endif
2761   } else a->colmap = 0;
2762   if (oldmat->garray) {
2763     PetscInt len;
2764     len  = oldmat->B->cmap->n;
2765     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2766     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2767     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2768   } else a->garray = 0;
2769 
2770   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2771   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2772   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2773   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2774 
2775   if (oldmat->Mvctx_mpi1) {
2776     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2777     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2778   }
2779 
2780   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2781   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2782   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2783   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2784   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2785   *newmat = mat;
2786   PetscFunctionReturn(0);
2787 }
2788 
2789 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2790 {
2791   PetscScalar    *vals,*svals;
2792   MPI_Comm       comm;
2793   PetscErrorCode ierr;
2794   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2795   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2796   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2797   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2798   PetscInt       cend,cstart,n,*rowners;
2799   int            fd;
2800   PetscInt       bs = newMat->rmap->bs;
2801 
2802   PetscFunctionBegin;
2803   /* force binary viewer to load .info file if it has not yet done so */
2804   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2805   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2806   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2807   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2808   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2809   if (!rank) {
2810     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2811     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2812     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2813   }
2814 
2815   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2816   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2817   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2818   if (bs < 0) bs = 1;
2819 
2820   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2821   M    = header[1]; N = header[2];
2822 
2823   /* If global sizes are set, check if they are consistent with that given in the file */
2824   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2825   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2826 
2827   /* determine ownership of all (block) rows */
2828   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2829   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2830   else m = newMat->rmap->n; /* Set by user */
2831 
2832   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2833   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2834 
2835   /* First process needs enough room for process with most rows */
2836   if (!rank) {
2837     mmax = rowners[1];
2838     for (i=2; i<=size; i++) {
2839       mmax = PetscMax(mmax, rowners[i]);
2840     }
2841   } else mmax = -1;             /* unused, but compilers complain */
2842 
2843   rowners[0] = 0;
2844   for (i=2; i<=size; i++) {
2845     rowners[i] += rowners[i-1];
2846   }
2847   rstart = rowners[rank];
2848   rend   = rowners[rank+1];
2849 
2850   /* distribute row lengths to all processors */
2851   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2852   if (!rank) {
2853     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2854     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2855     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2856     for (j=0; j<m; j++) {
2857       procsnz[0] += ourlens[j];
2858     }
2859     for (i=1; i<size; i++) {
2860       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2861       /* calculate the number of nonzeros on each processor */
2862       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2863         procsnz[i] += rowlengths[j];
2864       }
2865       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2866     }
2867     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2868   } else {
2869     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2870   }
2871 
2872   if (!rank) {
2873     /* determine max buffer needed and allocate it */
2874     maxnz = 0;
2875     for (i=0; i<size; i++) {
2876       maxnz = PetscMax(maxnz,procsnz[i]);
2877     }
2878     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2879 
2880     /* read in my part of the matrix column indices  */
2881     nz   = procsnz[0];
2882     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2883     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2884 
2885     /* read in every one elses and ship off */
2886     for (i=1; i<size; i++) {
2887       nz   = procsnz[i];
2888       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2889       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2890     }
2891     ierr = PetscFree(cols);CHKERRQ(ierr);
2892   } else {
2893     /* determine buffer space needed for message */
2894     nz = 0;
2895     for (i=0; i<m; i++) {
2896       nz += ourlens[i];
2897     }
2898     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2899 
2900     /* receive message of column indices*/
2901     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2902   }
2903 
2904   /* determine column ownership if matrix is not square */
2905   if (N != M) {
2906     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2907     else n = newMat->cmap->n;
2908     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2909     cstart = cend - n;
2910   } else {
2911     cstart = rstart;
2912     cend   = rend;
2913     n      = cend - cstart;
2914   }
2915 
2916   /* loop over local rows, determining number of off diagonal entries */
2917   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2918   jj   = 0;
2919   for (i=0; i<m; i++) {
2920     for (j=0; j<ourlens[i]; j++) {
2921       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2922       jj++;
2923     }
2924   }
2925 
2926   for (i=0; i<m; i++) {
2927     ourlens[i] -= offlens[i];
2928   }
2929   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2930 
2931   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2932 
2933   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2934 
2935   for (i=0; i<m; i++) {
2936     ourlens[i] += offlens[i];
2937   }
2938 
2939   if (!rank) {
2940     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2941 
2942     /* read in my part of the matrix numerical values  */
2943     nz   = procsnz[0];
2944     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2945 
2946     /* insert into matrix */
2947     jj      = rstart;
2948     smycols = mycols;
2949     svals   = vals;
2950     for (i=0; i<m; i++) {
2951       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2952       smycols += ourlens[i];
2953       svals   += ourlens[i];
2954       jj++;
2955     }
2956 
2957     /* read in other processors and ship out */
2958     for (i=1; i<size; i++) {
2959       nz   = procsnz[i];
2960       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2961       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2962     }
2963     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2964   } else {
2965     /* receive numeric values */
2966     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2967 
2968     /* receive message of values*/
2969     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2970 
2971     /* insert into matrix */
2972     jj      = rstart;
2973     smycols = mycols;
2974     svals   = vals;
2975     for (i=0; i<m; i++) {
2976       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2977       smycols += ourlens[i];
2978       svals   += ourlens[i];
2979       jj++;
2980     }
2981   }
2982   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2983   ierr = PetscFree(vals);CHKERRQ(ierr);
2984   ierr = PetscFree(mycols);CHKERRQ(ierr);
2985   ierr = PetscFree(rowners);CHKERRQ(ierr);
2986   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2987   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2988   PetscFunctionReturn(0);
2989 }
2990 
2991 /* Not scalable because of ISAllGather() unless getting all columns. */
2992 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
2993 {
2994   PetscErrorCode ierr;
2995   IS             iscol_local;
2996   PetscBool      isstride;
2997   PetscMPIInt    lisstride=0,gisstride;
2998 
2999   PetscFunctionBegin;
3000   /* check if we are grabbing all columns*/
3001   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3002 
3003   if (isstride) {
3004     PetscInt  start,len,mstart,mlen;
3005     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3006     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3007     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3008     if (mstart == start && mlen-mstart == len) lisstride = 1;
3009   }
3010 
3011   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3012   if (gisstride) {
3013     PetscInt N;
3014     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3015     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3016     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3017     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3018   } else {
3019     PetscInt cbs;
3020     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3021     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3022     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3023   }
3024 
3025   *isseq = iscol_local;
3026   PetscFunctionReturn(0);
3027 }
3028 
3029 /*
3030  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3031  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3032 
3033  Input Parameters:
3034    mat - matrix
3035    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3036            i.e., mat->rstart <= isrow[i] < mat->rend
3037    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3038            i.e., mat->cstart <= iscol[i] < mat->cend
3039  Output Parameter:
3040    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3041    iscol_o - sequential column index set for retrieving mat->B
3042    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3043  */
3044 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3045 {
3046   PetscErrorCode ierr;
3047   Vec            x,cmap;
3048   const PetscInt *is_idx;
3049   PetscScalar    *xarray,*cmaparray;
3050   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3051   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3052   Mat            B=a->B;
3053   Vec            lvec=a->lvec,lcmap;
3054   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3055   MPI_Comm       comm;
3056   VecScatter     Mvctx=a->Mvctx;
3057 
3058   PetscFunctionBegin;
3059   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3060   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3061 
3062   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3063   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3064   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3065   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3066   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3067 
3068   /* Get start indices */
3069   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3070   isstart -= ncols;
3071   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3072 
3073   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3074   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3075   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3076   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3077   for (i=0; i<ncols; i++) {
3078     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3079     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3080     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3081   }
3082   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3083   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3084   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3085 
3086   /* Get iscol_d */
3087   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3088   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3089   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3090 
3091   /* Get isrow_d */
3092   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3093   rstart = mat->rmap->rstart;
3094   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3095   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3096   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3097   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3098 
3099   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3100   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3101   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3102 
3103   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3104   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3105   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3106 
3107   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3108 
3109   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3110   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3111 
3112   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3113   /* off-process column indices */
3114   count = 0;
3115   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3116   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3117 
3118   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3119   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3120   for (i=0; i<Bn; i++) {
3121     if (PetscRealPart(xarray[i]) > -1.0) {
3122       idx[count]     = i;                   /* local column index in off-diagonal part B */
3123       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3124       count++;
3125     }
3126   }
3127   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3128   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3129 
3130   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3131   /* cannot ensure iscol_o has same blocksize as iscol! */
3132 
3133   ierr = PetscFree(idx);CHKERRQ(ierr);
3134   *garray = cmap1;
3135 
3136   ierr = VecDestroy(&x);CHKERRQ(ierr);
3137   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3138   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3139   PetscFunctionReturn(0);
3140 }
3141 
3142 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3143 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3144 {
3145   PetscErrorCode ierr;
3146   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3147   Mat            M = NULL;
3148   MPI_Comm       comm;
3149   IS             iscol_d,isrow_d,iscol_o;
3150   Mat            Asub = NULL,Bsub = NULL;
3151   PetscInt       n;
3152 
3153   PetscFunctionBegin;
3154   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3155 
3156   if (call == MAT_REUSE_MATRIX) {
3157     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3158     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3159     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3160 
3161     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3162     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3163 
3164     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3165     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3166 
3167     /* Update diagonal and off-diagonal portions of submat */
3168     asub = (Mat_MPIAIJ*)(*submat)->data;
3169     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3170     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3171     if (n) {
3172       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3173     }
3174     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3175     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3176 
3177   } else { /* call == MAT_INITIAL_MATRIX) */
3178     const PetscInt *garray;
3179     PetscInt        BsubN;
3180 
3181     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3182     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3183 
3184     /* Create local submatrices Asub and Bsub */
3185     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3186     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3187 
3188     /* Create submatrix M */
3189     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3190 
3191     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3192     asub = (Mat_MPIAIJ*)M->data;
3193 
3194     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3195     n = asub->B->cmap->N;
3196     if (BsubN > n) {
3197       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3198       const PetscInt *idx;
3199       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3200       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3201 
3202       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3203       j = 0;
3204       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3205       for (i=0; i<n; i++) {
3206         if (j >= BsubN) break;
3207         while (subgarray[i] > garray[j]) j++;
3208 
3209         if (subgarray[i] == garray[j]) {
3210           idx_new[i] = idx[j++];
3211         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3212       }
3213       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3214 
3215       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3216       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3217 
3218     } else if (BsubN < n) {
3219       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3220     }
3221 
3222     ierr = PetscFree(garray);CHKERRQ(ierr);
3223     *submat = M;
3224 
3225     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3226     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3227     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3228 
3229     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3230     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3231 
3232     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3233     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3234   }
3235   PetscFunctionReturn(0);
3236 }
3237 
3238 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3239 {
3240   PetscErrorCode ierr;
3241   IS             iscol_local=NULL,isrow_d;
3242   PetscInt       csize;
3243   PetscInt       n,i,j,start,end;
3244   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3245   MPI_Comm       comm;
3246 
3247   PetscFunctionBegin;
3248   /* If isrow has same processor distribution as mat,
3249      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3250   if (call == MAT_REUSE_MATRIX) {
3251     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3252     if (isrow_d) {
3253       sameRowDist  = PETSC_TRUE;
3254       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3255     } else {
3256       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3257       if (iscol_local) {
3258         sameRowDist  = PETSC_TRUE;
3259         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3260       }
3261     }
3262   } else {
3263     /* Check if isrow has same processor distribution as mat */
3264     sameDist[0] = PETSC_FALSE;
3265     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3266     if (!n) {
3267       sameDist[0] = PETSC_TRUE;
3268     } else {
3269       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3270       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3271       if (i >= start && j < end) {
3272         sameDist[0] = PETSC_TRUE;
3273       }
3274     }
3275 
3276     /* Check if iscol has same processor distribution as mat */
3277     sameDist[1] = PETSC_FALSE;
3278     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3279     if (!n) {
3280       sameDist[1] = PETSC_TRUE;
3281     } else {
3282       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3283       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3284       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3285     }
3286 
3287     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3288     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3289     sameRowDist = tsameDist[0];
3290   }
3291 
3292   if (sameRowDist) {
3293     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3294       /* isrow and iscol have same processor distribution as mat */
3295       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3296       PetscFunctionReturn(0);
3297     } else { /* sameRowDist */
3298       /* isrow has same processor distribution as mat */
3299       if (call == MAT_INITIAL_MATRIX) {
3300         PetscBool sorted;
3301         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3302         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3303         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3304         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3305 
3306         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3307         if (sorted) {
3308           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3309           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3310           PetscFunctionReturn(0);
3311         }
3312       } else { /* call == MAT_REUSE_MATRIX */
3313         IS    iscol_sub;
3314         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3315         if (iscol_sub) {
3316           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3317           PetscFunctionReturn(0);
3318         }
3319       }
3320     }
3321   }
3322 
3323   /* General case: iscol -> iscol_local which has global size of iscol */
3324   if (call == MAT_REUSE_MATRIX) {
3325     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3326     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3327   } else {
3328     if (!iscol_local) {
3329       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3330     }
3331   }
3332 
3333   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3334   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3335 
3336   if (call == MAT_INITIAL_MATRIX) {
3337     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3338     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3339   }
3340   PetscFunctionReturn(0);
3341 }
3342 
3343 /*@C
3344      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3345          and "off-diagonal" part of the matrix in CSR format.
3346 
3347    Collective on MPI_Comm
3348 
3349    Input Parameters:
3350 +  comm - MPI communicator
3351 .  A - "diagonal" portion of matrix
3352 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3353 -  garray - global index of B columns
3354 
3355    Output Parameter:
3356 .   mat - the matrix, with input A as its local diagonal matrix
3357    Level: advanced
3358 
3359    Notes:
3360        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3361        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3362 
3363 .seealso: MatCreateMPIAIJWithSplitArrays()
3364 @*/
3365 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3366 {
3367   PetscErrorCode ierr;
3368   Mat_MPIAIJ     *maij;
3369   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3370   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3371   PetscScalar    *oa=b->a;
3372   Mat            Bnew;
3373   PetscInt       m,n,N;
3374 
3375   PetscFunctionBegin;
3376   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3377   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3378   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3379   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3380   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3381   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3382 
3383   /* Get global columns of mat */
3384   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3385 
3386   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3387   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3388   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3389   maij = (Mat_MPIAIJ*)(*mat)->data;
3390 
3391   (*mat)->preallocated = PETSC_TRUE;
3392 
3393   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3394   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3395 
3396   /* Set A as diagonal portion of *mat */
3397   maij->A = A;
3398 
3399   nz = oi[m];
3400   for (i=0; i<nz; i++) {
3401     col   = oj[i];
3402     oj[i] = garray[col];
3403   }
3404 
3405    /* Set Bnew as off-diagonal portion of *mat */
3406   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3407   bnew        = (Mat_SeqAIJ*)Bnew->data;
3408   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3409   maij->B     = Bnew;
3410 
3411   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3412 
3413   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3414   b->free_a       = PETSC_FALSE;
3415   b->free_ij      = PETSC_FALSE;
3416   ierr = MatDestroy(&B);CHKERRQ(ierr);
3417 
3418   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3419   bnew->free_a       = PETSC_TRUE;
3420   bnew->free_ij      = PETSC_TRUE;
3421 
3422   /* condense columns of maij->B */
3423   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3424   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3425   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3426   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3427   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3428   PetscFunctionReturn(0);
3429 }
3430 
3431 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3432 
3433 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3434 {
3435   PetscErrorCode ierr;
3436   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3437   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3438   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3439   Mat            M,Msub,B=a->B;
3440   MatScalar      *aa;
3441   Mat_SeqAIJ     *aij;
3442   PetscInt       *garray = a->garray,*colsub,Ncols;
3443   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3444   IS             iscol_sub,iscmap;
3445   const PetscInt *is_idx,*cmap;
3446   PetscBool      allcolumns=PETSC_FALSE;
3447   MPI_Comm       comm;
3448 
3449   PetscFunctionBegin;
3450   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3451 
3452   if (call == MAT_REUSE_MATRIX) {
3453     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3454     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3455     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3456 
3457     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3458     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3459 
3460     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3461     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3462 
3463     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3464 
3465   } else { /* call == MAT_INITIAL_MATRIX) */
3466     PetscBool flg;
3467 
3468     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3469     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3470 
3471     /* (1) iscol -> nonscalable iscol_local */
3472     /* Check for special case: each processor gets entire matrix columns */
3473     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3474     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3475     if (allcolumns) {
3476       iscol_sub = iscol_local;
3477       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3478       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3479 
3480     } else {
3481       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3482       PetscInt *idx,*cmap1,k;
3483       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3484       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3485       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3486       count = 0;
3487       k     = 0;
3488       for (i=0; i<Ncols; i++) {
3489         j = is_idx[i];
3490         if (j >= cstart && j < cend) {
3491           /* diagonal part of mat */
3492           idx[count]     = j;
3493           cmap1[count++] = i; /* column index in submat */
3494         } else if (Bn) {
3495           /* off-diagonal part of mat */
3496           if (j == garray[k]) {
3497             idx[count]     = j;
3498             cmap1[count++] = i;  /* column index in submat */
3499           } else if (j > garray[k]) {
3500             while (j > garray[k] && k < Bn-1) k++;
3501             if (j == garray[k]) {
3502               idx[count]     = j;
3503               cmap1[count++] = i; /* column index in submat */
3504             }
3505           }
3506         }
3507       }
3508       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3509 
3510       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3511       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3512       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3513 
3514       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3515     }
3516 
3517     /* (3) Create sequential Msub */
3518     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3519   }
3520 
3521   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3522   aij  = (Mat_SeqAIJ*)(Msub)->data;
3523   ii   = aij->i;
3524   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3525 
3526   /*
3527       m - number of local rows
3528       Ncols - number of columns (same on all processors)
3529       rstart - first row in new global matrix generated
3530   */
3531   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3532 
3533   if (call == MAT_INITIAL_MATRIX) {
3534     /* (4) Create parallel newmat */
3535     PetscMPIInt    rank,size;
3536     PetscInt       csize;
3537 
3538     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3539     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3540 
3541     /*
3542         Determine the number of non-zeros in the diagonal and off-diagonal
3543         portions of the matrix in order to do correct preallocation
3544     */
3545 
3546     /* first get start and end of "diagonal" columns */
3547     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3548     if (csize == PETSC_DECIDE) {
3549       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3550       if (mglobal == Ncols) { /* square matrix */
3551         nlocal = m;
3552       } else {
3553         nlocal = Ncols/size + ((Ncols % size) > rank);
3554       }
3555     } else {
3556       nlocal = csize;
3557     }
3558     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3559     rstart = rend - nlocal;
3560     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3561 
3562     /* next, compute all the lengths */
3563     jj    = aij->j;
3564     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3565     olens = dlens + m;
3566     for (i=0; i<m; i++) {
3567       jend = ii[i+1] - ii[i];
3568       olen = 0;
3569       dlen = 0;
3570       for (j=0; j<jend; j++) {
3571         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3572         else dlen++;
3573         jj++;
3574       }
3575       olens[i] = olen;
3576       dlens[i] = dlen;
3577     }
3578 
3579     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3580     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3581 
3582     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3583     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3584     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3585     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3586     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3587     ierr = PetscFree(dlens);CHKERRQ(ierr);
3588 
3589   } else { /* call == MAT_REUSE_MATRIX */
3590     M    = *newmat;
3591     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3592     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3593     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3594     /*
3595          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3596        rather than the slower MatSetValues().
3597     */
3598     M->was_assembled = PETSC_TRUE;
3599     M->assembled     = PETSC_FALSE;
3600   }
3601 
3602   /* (5) Set values of Msub to *newmat */
3603   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3604   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3605 
3606   jj   = aij->j;
3607   aa   = aij->a;
3608   for (i=0; i<m; i++) {
3609     row = rstart + i;
3610     nz  = ii[i+1] - ii[i];
3611     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3612     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3613     jj += nz; aa += nz;
3614   }
3615   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3616 
3617   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3618   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3619 
3620   ierr = PetscFree(colsub);CHKERRQ(ierr);
3621 
3622   /* save Msub, iscol_sub and iscmap used in processor for next request */
3623   if (call ==  MAT_INITIAL_MATRIX) {
3624     *newmat = M;
3625     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3626     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3627 
3628     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3629     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3630 
3631     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3632     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3633 
3634     if (iscol_local) {
3635       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3636       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3637     }
3638   }
3639   PetscFunctionReturn(0);
3640 }
3641 
3642 /*
3643     Not great since it makes two copies of the submatrix, first an SeqAIJ
3644   in local and then by concatenating the local matrices the end result.
3645   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3646 
3647   Note: This requires a sequential iscol with all indices.
3648 */
3649 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3650 {
3651   PetscErrorCode ierr;
3652   PetscMPIInt    rank,size;
3653   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3654   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3655   Mat            M,Mreuse;
3656   MatScalar      *aa,*vwork;
3657   MPI_Comm       comm;
3658   Mat_SeqAIJ     *aij;
3659   PetscBool      colflag,allcolumns=PETSC_FALSE;
3660 
3661   PetscFunctionBegin;
3662   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3663   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3664   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3665 
3666   /* Check for special case: each processor gets entire matrix columns */
3667   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3668   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3669   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3670 
3671   if (call ==  MAT_REUSE_MATRIX) {
3672     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3673     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3674     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3675   } else {
3676     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3677   }
3678 
3679   /*
3680       m - number of local rows
3681       n - number of columns (same on all processors)
3682       rstart - first row in new global matrix generated
3683   */
3684   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3685   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3686   if (call == MAT_INITIAL_MATRIX) {
3687     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3688     ii  = aij->i;
3689     jj  = aij->j;
3690 
3691     /*
3692         Determine the number of non-zeros in the diagonal and off-diagonal
3693         portions of the matrix in order to do correct preallocation
3694     */
3695 
3696     /* first get start and end of "diagonal" columns */
3697     if (csize == PETSC_DECIDE) {
3698       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3699       if (mglobal == n) { /* square matrix */
3700         nlocal = m;
3701       } else {
3702         nlocal = n/size + ((n % size) > rank);
3703       }
3704     } else {
3705       nlocal = csize;
3706     }
3707     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3708     rstart = rend - nlocal;
3709     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3710 
3711     /* next, compute all the lengths */
3712     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3713     olens = dlens + m;
3714     for (i=0; i<m; i++) {
3715       jend = ii[i+1] - ii[i];
3716       olen = 0;
3717       dlen = 0;
3718       for (j=0; j<jend; j++) {
3719         if (*jj < rstart || *jj >= rend) olen++;
3720         else dlen++;
3721         jj++;
3722       }
3723       olens[i] = olen;
3724       dlens[i] = dlen;
3725     }
3726     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3727     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3728     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3729     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3730     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3731     ierr = PetscFree(dlens);CHKERRQ(ierr);
3732   } else {
3733     PetscInt ml,nl;
3734 
3735     M    = *newmat;
3736     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3737     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3738     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3739     /*
3740          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3741        rather than the slower MatSetValues().
3742     */
3743     M->was_assembled = PETSC_TRUE;
3744     M->assembled     = PETSC_FALSE;
3745   }
3746   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3747   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3748   ii   = aij->i;
3749   jj   = aij->j;
3750   aa   = aij->a;
3751   for (i=0; i<m; i++) {
3752     row   = rstart + i;
3753     nz    = ii[i+1] - ii[i];
3754     cwork = jj;     jj += nz;
3755     vwork = aa;     aa += nz;
3756     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3757   }
3758 
3759   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3760   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3761   *newmat = M;
3762 
3763   /* save submatrix used in processor for next request */
3764   if (call ==  MAT_INITIAL_MATRIX) {
3765     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3766     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3767   }
3768   PetscFunctionReturn(0);
3769 }
3770 
3771 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3772 {
3773   PetscInt       m,cstart, cend,j,nnz,i,d;
3774   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3775   const PetscInt *JJ;
3776   PetscScalar    *values;
3777   PetscErrorCode ierr;
3778   PetscBool      nooffprocentries;
3779 
3780   PetscFunctionBegin;
3781   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3782 
3783   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3784   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3785   m      = B->rmap->n;
3786   cstart = B->cmap->rstart;
3787   cend   = B->cmap->rend;
3788   rstart = B->rmap->rstart;
3789 
3790   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3791 
3792 #if defined(PETSC_USE_DEBUGGING)
3793   for (i=0; i<m; i++) {
3794     nnz = Ii[i+1]- Ii[i];
3795     JJ  = J + Ii[i];
3796     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3797     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3798     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3799   }
3800 #endif
3801 
3802   for (i=0; i<m; i++) {
3803     nnz     = Ii[i+1]- Ii[i];
3804     JJ      = J + Ii[i];
3805     nnz_max = PetscMax(nnz_max,nnz);
3806     d       = 0;
3807     for (j=0; j<nnz; j++) {
3808       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3809     }
3810     d_nnz[i] = d;
3811     o_nnz[i] = nnz - d;
3812   }
3813   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3814   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3815 
3816   if (v) values = (PetscScalar*)v;
3817   else {
3818     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3819   }
3820 
3821   for (i=0; i<m; i++) {
3822     ii   = i + rstart;
3823     nnz  = Ii[i+1]- Ii[i];
3824     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3825   }
3826   nooffprocentries    = B->nooffprocentries;
3827   B->nooffprocentries = PETSC_TRUE;
3828   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3829   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3830   B->nooffprocentries = nooffprocentries;
3831 
3832   if (!v) {
3833     ierr = PetscFree(values);CHKERRQ(ierr);
3834   }
3835   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 /*@
3840    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3841    (the default parallel PETSc format).
3842 
3843    Collective on MPI_Comm
3844 
3845    Input Parameters:
3846 +  B - the matrix
3847 .  i - the indices into j for the start of each local row (starts with zero)
3848 .  j - the column indices for each local row (starts with zero)
3849 -  v - optional values in the matrix
3850 
3851    Level: developer
3852 
3853    Notes:
3854        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3855      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3856      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3857 
3858        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3859 
3860        The format which is used for the sparse matrix input, is equivalent to a
3861     row-major ordering.. i.e for the following matrix, the input data expected is
3862     as shown
3863 
3864 $        1 0 0
3865 $        2 0 3     P0
3866 $       -------
3867 $        4 5 6     P1
3868 $
3869 $     Process0 [P0]: rows_owned=[0,1]
3870 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3871 $        j =  {0,0,2}  [size = 3]
3872 $        v =  {1,2,3}  [size = 3]
3873 $
3874 $     Process1 [P1]: rows_owned=[2]
3875 $        i =  {0,3}    [size = nrow+1  = 1+1]
3876 $        j =  {0,1,2}  [size = 3]
3877 $        v =  {4,5,6}  [size = 3]
3878 
3879 .keywords: matrix, aij, compressed row, sparse, parallel
3880 
3881 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3882           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3883 @*/
3884 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3885 {
3886   PetscErrorCode ierr;
3887 
3888   PetscFunctionBegin;
3889   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3890   PetscFunctionReturn(0);
3891 }
3892 
3893 /*@C
3894    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3895    (the default parallel PETSc format).  For good matrix assembly performance
3896    the user should preallocate the matrix storage by setting the parameters
3897    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3898    performance can be increased by more than a factor of 50.
3899 
3900    Collective on MPI_Comm
3901 
3902    Input Parameters:
3903 +  B - the matrix
3904 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3905            (same value is used for all local rows)
3906 .  d_nnz - array containing the number of nonzeros in the various rows of the
3907            DIAGONAL portion of the local submatrix (possibly different for each row)
3908            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3909            The size of this array is equal to the number of local rows, i.e 'm'.
3910            For matrices that will be factored, you must leave room for (and set)
3911            the diagonal entry even if it is zero.
3912 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3913            submatrix (same value is used for all local rows).
3914 -  o_nnz - array containing the number of nonzeros in the various rows of the
3915            OFF-DIAGONAL portion of the local submatrix (possibly different for
3916            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3917            structure. The size of this array is equal to the number
3918            of local rows, i.e 'm'.
3919 
3920    If the *_nnz parameter is given then the *_nz parameter is ignored
3921 
3922    The AIJ format (also called the Yale sparse matrix format or
3923    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3924    storage.  The stored row and column indices begin with zero.
3925    See Users-Manual: ch_mat for details.
3926 
3927    The parallel matrix is partitioned such that the first m0 rows belong to
3928    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3929    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3930 
3931    The DIAGONAL portion of the local submatrix of a processor can be defined
3932    as the submatrix which is obtained by extraction the part corresponding to
3933    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3934    first row that belongs to the processor, r2 is the last row belonging to
3935    the this processor, and c1-c2 is range of indices of the local part of a
3936    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3937    common case of a square matrix, the row and column ranges are the same and
3938    the DIAGONAL part is also square. The remaining portion of the local
3939    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3940 
3941    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3942 
3943    You can call MatGetInfo() to get information on how effective the preallocation was;
3944    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3945    You can also run with the option -info and look for messages with the string
3946    malloc in them to see if additional memory allocation was needed.
3947 
3948    Example usage:
3949 
3950    Consider the following 8x8 matrix with 34 non-zero values, that is
3951    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3952    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3953    as follows:
3954 
3955 .vb
3956             1  2  0  |  0  3  0  |  0  4
3957     Proc0   0  5  6  |  7  0  0  |  8  0
3958             9  0 10  | 11  0  0  | 12  0
3959     -------------------------------------
3960            13  0 14  | 15 16 17  |  0  0
3961     Proc1   0 18  0  | 19 20 21  |  0  0
3962             0  0  0  | 22 23  0  | 24  0
3963     -------------------------------------
3964     Proc2  25 26 27  |  0  0 28  | 29  0
3965            30  0  0  | 31 32 33  |  0 34
3966 .ve
3967 
3968    This can be represented as a collection of submatrices as:
3969 
3970 .vb
3971       A B C
3972       D E F
3973       G H I
3974 .ve
3975 
3976    Where the submatrices A,B,C are owned by proc0, D,E,F are
3977    owned by proc1, G,H,I are owned by proc2.
3978 
3979    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3980    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3981    The 'M','N' parameters are 8,8, and have the same values on all procs.
3982 
3983    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3984    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3985    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3986    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3987    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3988    matrix, ans [DF] as another SeqAIJ matrix.
3989 
3990    When d_nz, o_nz parameters are specified, d_nz storage elements are
3991    allocated for every row of the local diagonal submatrix, and o_nz
3992    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3993    One way to choose d_nz and o_nz is to use the max nonzerors per local
3994    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3995    In this case, the values of d_nz,o_nz are:
3996 .vb
3997      proc0 : dnz = 2, o_nz = 2
3998      proc1 : dnz = 3, o_nz = 2
3999      proc2 : dnz = 1, o_nz = 4
4000 .ve
4001    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4002    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4003    for proc3. i.e we are using 12+15+10=37 storage locations to store
4004    34 values.
4005 
4006    When d_nnz, o_nnz parameters are specified, the storage is specified
4007    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4008    In the above case the values for d_nnz,o_nnz are:
4009 .vb
4010      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4011      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4012      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4013 .ve
4014    Here the space allocated is sum of all the above values i.e 34, and
4015    hence pre-allocation is perfect.
4016 
4017    Level: intermediate
4018 
4019 .keywords: matrix, aij, compressed row, sparse, parallel
4020 
4021 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4022           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4023 @*/
4024 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4025 {
4026   PetscErrorCode ierr;
4027 
4028   PetscFunctionBegin;
4029   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4030   PetscValidType(B,1);
4031   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4032   PetscFunctionReturn(0);
4033 }
4034 
4035 /*@
4036      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4037          CSR format the local rows.
4038 
4039    Collective on MPI_Comm
4040 
4041    Input Parameters:
4042 +  comm - MPI communicator
4043 .  m - number of local rows (Cannot be PETSC_DECIDE)
4044 .  n - This value should be the same as the local size used in creating the
4045        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4046        calculated if N is given) For square matrices n is almost always m.
4047 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4048 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4049 .   i - row indices
4050 .   j - column indices
4051 -   a - matrix values
4052 
4053    Output Parameter:
4054 .   mat - the matrix
4055 
4056    Level: intermediate
4057 
4058    Notes:
4059        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4060      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4061      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4062 
4063        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4064 
4065        The format which is used for the sparse matrix input, is equivalent to a
4066     row-major ordering.. i.e for the following matrix, the input data expected is
4067     as shown
4068 
4069 $        1 0 0
4070 $        2 0 3     P0
4071 $       -------
4072 $        4 5 6     P1
4073 $
4074 $     Process0 [P0]: rows_owned=[0,1]
4075 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4076 $        j =  {0,0,2}  [size = 3]
4077 $        v =  {1,2,3}  [size = 3]
4078 $
4079 $     Process1 [P1]: rows_owned=[2]
4080 $        i =  {0,3}    [size = nrow+1  = 1+1]
4081 $        j =  {0,1,2}  [size = 3]
4082 $        v =  {4,5,6}  [size = 3]
4083 
4084 .keywords: matrix, aij, compressed row, sparse, parallel
4085 
4086 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4087           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4088 @*/
4089 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4090 {
4091   PetscErrorCode ierr;
4092 
4093   PetscFunctionBegin;
4094   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4095   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4096   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4097   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4098   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4099   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4100   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4101   PetscFunctionReturn(0);
4102 }
4103 
4104 /*@C
4105    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4106    (the default parallel PETSc format).  For good matrix assembly performance
4107    the user should preallocate the matrix storage by setting the parameters
4108    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4109    performance can be increased by more than a factor of 50.
4110 
4111    Collective on MPI_Comm
4112 
4113    Input Parameters:
4114 +  comm - MPI communicator
4115 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4116            This value should be the same as the local size used in creating the
4117            y vector for the matrix-vector product y = Ax.
4118 .  n - This value should be the same as the local size used in creating the
4119        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4120        calculated if N is given) For square matrices n is almost always m.
4121 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4122 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4123 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4124            (same value is used for all local rows)
4125 .  d_nnz - array containing the number of nonzeros in the various rows of the
4126            DIAGONAL portion of the local submatrix (possibly different for each row)
4127            or NULL, if d_nz is used to specify the nonzero structure.
4128            The size of this array is equal to the number of local rows, i.e 'm'.
4129 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4130            submatrix (same value is used for all local rows).
4131 -  o_nnz - array containing the number of nonzeros in the various rows of the
4132            OFF-DIAGONAL portion of the local submatrix (possibly different for
4133            each row) or NULL, if o_nz is used to specify the nonzero
4134            structure. The size of this array is equal to the number
4135            of local rows, i.e 'm'.
4136 
4137    Output Parameter:
4138 .  A - the matrix
4139 
4140    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4141    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4142    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4143 
4144    Notes:
4145    If the *_nnz parameter is given then the *_nz parameter is ignored
4146 
4147    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4148    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4149    storage requirements for this matrix.
4150 
4151    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4152    processor than it must be used on all processors that share the object for
4153    that argument.
4154 
4155    The user MUST specify either the local or global matrix dimensions
4156    (possibly both).
4157 
4158    The parallel matrix is partitioned across processors such that the
4159    first m0 rows belong to process 0, the next m1 rows belong to
4160    process 1, the next m2 rows belong to process 2 etc.. where
4161    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4162    values corresponding to [m x N] submatrix.
4163 
4164    The columns are logically partitioned with the n0 columns belonging
4165    to 0th partition, the next n1 columns belonging to the next
4166    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4167 
4168    The DIAGONAL portion of the local submatrix on any given processor
4169    is the submatrix corresponding to the rows and columns m,n
4170    corresponding to the given processor. i.e diagonal matrix on
4171    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4172    etc. The remaining portion of the local submatrix [m x (N-n)]
4173    constitute the OFF-DIAGONAL portion. The example below better
4174    illustrates this concept.
4175 
4176    For a square global matrix we define each processor's diagonal portion
4177    to be its local rows and the corresponding columns (a square submatrix);
4178    each processor's off-diagonal portion encompasses the remainder of the
4179    local matrix (a rectangular submatrix).
4180 
4181    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4182 
4183    When calling this routine with a single process communicator, a matrix of
4184    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4185    type of communicator, use the construction mechanism
4186 .vb
4187      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4188 .ve
4189 
4190 $     MatCreate(...,&A);
4191 $     MatSetType(A,MATMPIAIJ);
4192 $     MatSetSizes(A, m,n,M,N);
4193 $     MatMPIAIJSetPreallocation(A,...);
4194 
4195    By default, this format uses inodes (identical nodes) when possible.
4196    We search for consecutive rows with the same nonzero structure, thereby
4197    reusing matrix information to achieve increased efficiency.
4198 
4199    Options Database Keys:
4200 +  -mat_no_inode  - Do not use inodes
4201 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4202 -  -mat_aij_oneindex - Internally use indexing starting at 1
4203         rather than 0.  Note that when calling MatSetValues(),
4204         the user still MUST index entries starting at 0!
4205 
4206 
4207    Example usage:
4208 
4209    Consider the following 8x8 matrix with 34 non-zero values, that is
4210    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4211    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4212    as follows
4213 
4214 .vb
4215             1  2  0  |  0  3  0  |  0  4
4216     Proc0   0  5  6  |  7  0  0  |  8  0
4217             9  0 10  | 11  0  0  | 12  0
4218     -------------------------------------
4219            13  0 14  | 15 16 17  |  0  0
4220     Proc1   0 18  0  | 19 20 21  |  0  0
4221             0  0  0  | 22 23  0  | 24  0
4222     -------------------------------------
4223     Proc2  25 26 27  |  0  0 28  | 29  0
4224            30  0  0  | 31 32 33  |  0 34
4225 .ve
4226 
4227    This can be represented as a collection of submatrices as
4228 
4229 .vb
4230       A B C
4231       D E F
4232       G H I
4233 .ve
4234 
4235    Where the submatrices A,B,C are owned by proc0, D,E,F are
4236    owned by proc1, G,H,I are owned by proc2.
4237 
4238    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4239    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4240    The 'M','N' parameters are 8,8, and have the same values on all procs.
4241 
4242    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4243    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4244    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4245    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4246    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4247    matrix, ans [DF] as another SeqAIJ matrix.
4248 
4249    When d_nz, o_nz parameters are specified, d_nz storage elements are
4250    allocated for every row of the local diagonal submatrix, and o_nz
4251    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4252    One way to choose d_nz and o_nz is to use the max nonzerors per local
4253    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4254    In this case, the values of d_nz,o_nz are
4255 .vb
4256      proc0 : dnz = 2, o_nz = 2
4257      proc1 : dnz = 3, o_nz = 2
4258      proc2 : dnz = 1, o_nz = 4
4259 .ve
4260    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4261    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4262    for proc3. i.e we are using 12+15+10=37 storage locations to store
4263    34 values.
4264 
4265    When d_nnz, o_nnz parameters are specified, the storage is specified
4266    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4267    In the above case the values for d_nnz,o_nnz are
4268 .vb
4269      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4270      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4271      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4272 .ve
4273    Here the space allocated is sum of all the above values i.e 34, and
4274    hence pre-allocation is perfect.
4275 
4276    Level: intermediate
4277 
4278 .keywords: matrix, aij, compressed row, sparse, parallel
4279 
4280 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4281           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4282 @*/
4283 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4284 {
4285   PetscErrorCode ierr;
4286   PetscMPIInt    size;
4287 
4288   PetscFunctionBegin;
4289   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4290   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4291   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4292   if (size > 1) {
4293     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4294     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4295   } else {
4296     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4297     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4298   }
4299   PetscFunctionReturn(0);
4300 }
4301 
4302 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4303 {
4304   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4305   PetscBool      flg;
4306   PetscErrorCode ierr;
4307 
4308   PetscFunctionBegin;
4309   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4310   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4311   if (Ad)     *Ad     = a->A;
4312   if (Ao)     *Ao     = a->B;
4313   if (colmap) *colmap = a->garray;
4314   PetscFunctionReturn(0);
4315 }
4316 
4317 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4318 {
4319   PetscErrorCode ierr;
4320   PetscInt       m,N,i,rstart,nnz,Ii;
4321   PetscInt       *indx;
4322   PetscScalar    *values;
4323 
4324   PetscFunctionBegin;
4325   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4326   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4327     PetscInt       *dnz,*onz,sum,bs,cbs;
4328 
4329     if (n == PETSC_DECIDE) {
4330       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4331     }
4332     /* Check sum(n) = N */
4333     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4334     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4335 
4336     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4337     rstart -= m;
4338 
4339     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4340     for (i=0; i<m; i++) {
4341       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4342       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4343       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4344     }
4345 
4346     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4347     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4348     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4349     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4350     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4351     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4352     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4353     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4354   }
4355 
4356   /* numeric phase */
4357   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4358   for (i=0; i<m; i++) {
4359     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4360     Ii   = i + rstart;
4361     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4362     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4363   }
4364   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4365   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4366   PetscFunctionReturn(0);
4367 }
4368 
4369 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4370 {
4371   PetscErrorCode    ierr;
4372   PetscMPIInt       rank;
4373   PetscInt          m,N,i,rstart,nnz;
4374   size_t            len;
4375   const PetscInt    *indx;
4376   PetscViewer       out;
4377   char              *name;
4378   Mat               B;
4379   const PetscScalar *values;
4380 
4381   PetscFunctionBegin;
4382   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4383   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4384   /* Should this be the type of the diagonal block of A? */
4385   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4386   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4387   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4388   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4389   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4390   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4391   for (i=0; i<m; i++) {
4392     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4393     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4394     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4395   }
4396   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4397   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4398 
4399   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4400   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4401   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4402   sprintf(name,"%s.%d",outfile,rank);
4403   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4404   ierr = PetscFree(name);CHKERRQ(ierr);
4405   ierr = MatView(B,out);CHKERRQ(ierr);
4406   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4407   ierr = MatDestroy(&B);CHKERRQ(ierr);
4408   PetscFunctionReturn(0);
4409 }
4410 
4411 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4412 {
4413   PetscErrorCode      ierr;
4414   Mat_Merge_SeqsToMPI *merge;
4415   PetscContainer      container;
4416 
4417   PetscFunctionBegin;
4418   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4419   if (container) {
4420     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4421     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4422     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4423     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4424     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4425     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4426     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4427     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4428     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4429     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4430     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4431     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4432     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4433     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4434     ierr = PetscFree(merge);CHKERRQ(ierr);
4435     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4436   }
4437   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4438   PetscFunctionReturn(0);
4439 }
4440 
4441 #include <../src/mat/utils/freespace.h>
4442 #include <petscbt.h>
4443 
4444 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4445 {
4446   PetscErrorCode      ierr;
4447   MPI_Comm            comm;
4448   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4449   PetscMPIInt         size,rank,taga,*len_s;
4450   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4451   PetscInt            proc,m;
4452   PetscInt            **buf_ri,**buf_rj;
4453   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4454   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4455   MPI_Request         *s_waits,*r_waits;
4456   MPI_Status          *status;
4457   MatScalar           *aa=a->a;
4458   MatScalar           **abuf_r,*ba_i;
4459   Mat_Merge_SeqsToMPI *merge;
4460   PetscContainer      container;
4461 
4462   PetscFunctionBegin;
4463   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4464   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4465 
4466   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4467   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4468 
4469   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4470   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4471 
4472   bi     = merge->bi;
4473   bj     = merge->bj;
4474   buf_ri = merge->buf_ri;
4475   buf_rj = merge->buf_rj;
4476 
4477   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4478   owners = merge->rowmap->range;
4479   len_s  = merge->len_s;
4480 
4481   /* send and recv matrix values */
4482   /*-----------------------------*/
4483   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4484   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4485 
4486   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4487   for (proc=0,k=0; proc<size; proc++) {
4488     if (!len_s[proc]) continue;
4489     i    = owners[proc];
4490     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4491     k++;
4492   }
4493 
4494   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4495   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4496   ierr = PetscFree(status);CHKERRQ(ierr);
4497 
4498   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4499   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4500 
4501   /* insert mat values of mpimat */
4502   /*----------------------------*/
4503   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4504   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4505 
4506   for (k=0; k<merge->nrecv; k++) {
4507     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4508     nrows       = *(buf_ri_k[k]);
4509     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4510     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4511   }
4512 
4513   /* set values of ba */
4514   m = merge->rowmap->n;
4515   for (i=0; i<m; i++) {
4516     arow = owners[rank] + i;
4517     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4518     bnzi = bi[i+1] - bi[i];
4519     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4520 
4521     /* add local non-zero vals of this proc's seqmat into ba */
4522     anzi   = ai[arow+1] - ai[arow];
4523     aj     = a->j + ai[arow];
4524     aa     = a->a + ai[arow];
4525     nextaj = 0;
4526     for (j=0; nextaj<anzi; j++) {
4527       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4528         ba_i[j] += aa[nextaj++];
4529       }
4530     }
4531 
4532     /* add received vals into ba */
4533     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4534       /* i-th row */
4535       if (i == *nextrow[k]) {
4536         anzi   = *(nextai[k]+1) - *nextai[k];
4537         aj     = buf_rj[k] + *(nextai[k]);
4538         aa     = abuf_r[k] + *(nextai[k]);
4539         nextaj = 0;
4540         for (j=0; nextaj<anzi; j++) {
4541           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4542             ba_i[j] += aa[nextaj++];
4543           }
4544         }
4545         nextrow[k]++; nextai[k]++;
4546       }
4547     }
4548     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4549   }
4550   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4551   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4552 
4553   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4554   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4555   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4556   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4557   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4558   PetscFunctionReturn(0);
4559 }
4560 
4561 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4562 {
4563   PetscErrorCode      ierr;
4564   Mat                 B_mpi;
4565   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4566   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4567   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4568   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4569   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4570   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4571   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4572   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4573   MPI_Status          *status;
4574   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4575   PetscBT             lnkbt;
4576   Mat_Merge_SeqsToMPI *merge;
4577   PetscContainer      container;
4578 
4579   PetscFunctionBegin;
4580   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4581 
4582   /* make sure it is a PETSc comm */
4583   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4584   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4585   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4586 
4587   ierr = PetscNew(&merge);CHKERRQ(ierr);
4588   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4589 
4590   /* determine row ownership */
4591   /*---------------------------------------------------------*/
4592   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4593   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4594   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4595   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4596   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4597   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4598   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4599 
4600   m      = merge->rowmap->n;
4601   owners = merge->rowmap->range;
4602 
4603   /* determine the number of messages to send, their lengths */
4604   /*---------------------------------------------------------*/
4605   len_s = merge->len_s;
4606 
4607   len          = 0; /* length of buf_si[] */
4608   merge->nsend = 0;
4609   for (proc=0; proc<size; proc++) {
4610     len_si[proc] = 0;
4611     if (proc == rank) {
4612       len_s[proc] = 0;
4613     } else {
4614       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4615       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4616     }
4617     if (len_s[proc]) {
4618       merge->nsend++;
4619       nrows = 0;
4620       for (i=owners[proc]; i<owners[proc+1]; i++) {
4621         if (ai[i+1] > ai[i]) nrows++;
4622       }
4623       len_si[proc] = 2*(nrows+1);
4624       len         += len_si[proc];
4625     }
4626   }
4627 
4628   /* determine the number and length of messages to receive for ij-structure */
4629   /*-------------------------------------------------------------------------*/
4630   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4631   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4632 
4633   /* post the Irecv of j-structure */
4634   /*-------------------------------*/
4635   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4636   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4637 
4638   /* post the Isend of j-structure */
4639   /*--------------------------------*/
4640   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4641 
4642   for (proc=0, k=0; proc<size; proc++) {
4643     if (!len_s[proc]) continue;
4644     i    = owners[proc];
4645     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4646     k++;
4647   }
4648 
4649   /* receives and sends of j-structure are complete */
4650   /*------------------------------------------------*/
4651   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4652   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4653 
4654   /* send and recv i-structure */
4655   /*---------------------------*/
4656   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4657   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4658 
4659   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4660   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4661   for (proc=0,k=0; proc<size; proc++) {
4662     if (!len_s[proc]) continue;
4663     /* form outgoing message for i-structure:
4664          buf_si[0]:                 nrows to be sent
4665                [1:nrows]:           row index (global)
4666                [nrows+1:2*nrows+1]: i-structure index
4667     */
4668     /*-------------------------------------------*/
4669     nrows       = len_si[proc]/2 - 1;
4670     buf_si_i    = buf_si + nrows+1;
4671     buf_si[0]   = nrows;
4672     buf_si_i[0] = 0;
4673     nrows       = 0;
4674     for (i=owners[proc]; i<owners[proc+1]; i++) {
4675       anzi = ai[i+1] - ai[i];
4676       if (anzi) {
4677         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4678         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4679         nrows++;
4680       }
4681     }
4682     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4683     k++;
4684     buf_si += len_si[proc];
4685   }
4686 
4687   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4688   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4689 
4690   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4691   for (i=0; i<merge->nrecv; i++) {
4692     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4693   }
4694 
4695   ierr = PetscFree(len_si);CHKERRQ(ierr);
4696   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4697   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4698   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4699   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4700   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4701   ierr = PetscFree(status);CHKERRQ(ierr);
4702 
4703   /* compute a local seq matrix in each processor */
4704   /*----------------------------------------------*/
4705   /* allocate bi array and free space for accumulating nonzero column info */
4706   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4707   bi[0] = 0;
4708 
4709   /* create and initialize a linked list */
4710   nlnk = N+1;
4711   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4712 
4713   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4714   len  = ai[owners[rank+1]] - ai[owners[rank]];
4715   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4716 
4717   current_space = free_space;
4718 
4719   /* determine symbolic info for each local row */
4720   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4721 
4722   for (k=0; k<merge->nrecv; k++) {
4723     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4724     nrows       = *buf_ri_k[k];
4725     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4726     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4727   }
4728 
4729   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4730   len  = 0;
4731   for (i=0; i<m; i++) {
4732     bnzi = 0;
4733     /* add local non-zero cols of this proc's seqmat into lnk */
4734     arow  = owners[rank] + i;
4735     anzi  = ai[arow+1] - ai[arow];
4736     aj    = a->j + ai[arow];
4737     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4738     bnzi += nlnk;
4739     /* add received col data into lnk */
4740     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4741       if (i == *nextrow[k]) { /* i-th row */
4742         anzi  = *(nextai[k]+1) - *nextai[k];
4743         aj    = buf_rj[k] + *nextai[k];
4744         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4745         bnzi += nlnk;
4746         nextrow[k]++; nextai[k]++;
4747       }
4748     }
4749     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4750 
4751     /* if free space is not available, make more free space */
4752     if (current_space->local_remaining<bnzi) {
4753       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4754       nspacedouble++;
4755     }
4756     /* copy data into free space, then initialize lnk */
4757     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4758     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4759 
4760     current_space->array           += bnzi;
4761     current_space->local_used      += bnzi;
4762     current_space->local_remaining -= bnzi;
4763 
4764     bi[i+1] = bi[i] + bnzi;
4765   }
4766 
4767   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4768 
4769   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4770   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4771   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4772 
4773   /* create symbolic parallel matrix B_mpi */
4774   /*---------------------------------------*/
4775   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4776   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4777   if (n==PETSC_DECIDE) {
4778     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4779   } else {
4780     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4781   }
4782   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4783   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4784   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4785   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4786   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4787 
4788   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4789   B_mpi->assembled    = PETSC_FALSE;
4790   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4791   merge->bi           = bi;
4792   merge->bj           = bj;
4793   merge->buf_ri       = buf_ri;
4794   merge->buf_rj       = buf_rj;
4795   merge->coi          = NULL;
4796   merge->coj          = NULL;
4797   merge->owners_co    = NULL;
4798 
4799   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4800 
4801   /* attach the supporting struct to B_mpi for reuse */
4802   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4803   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4804   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4805   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4806   *mpimat = B_mpi;
4807 
4808   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4809   PetscFunctionReturn(0);
4810 }
4811 
4812 /*@C
4813       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4814                  matrices from each processor
4815 
4816     Collective on MPI_Comm
4817 
4818    Input Parameters:
4819 +    comm - the communicators the parallel matrix will live on
4820 .    seqmat - the input sequential matrices
4821 .    m - number of local rows (or PETSC_DECIDE)
4822 .    n - number of local columns (or PETSC_DECIDE)
4823 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4824 
4825    Output Parameter:
4826 .    mpimat - the parallel matrix generated
4827 
4828     Level: advanced
4829 
4830    Notes:
4831      The dimensions of the sequential matrix in each processor MUST be the same.
4832      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4833      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4834 @*/
4835 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4836 {
4837   PetscErrorCode ierr;
4838   PetscMPIInt    size;
4839 
4840   PetscFunctionBegin;
4841   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4842   if (size == 1) {
4843     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4844     if (scall == MAT_INITIAL_MATRIX) {
4845       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4846     } else {
4847       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4848     }
4849     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4850     PetscFunctionReturn(0);
4851   }
4852   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4853   if (scall == MAT_INITIAL_MATRIX) {
4854     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4855   }
4856   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4857   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4858   PetscFunctionReturn(0);
4859 }
4860 
4861 /*@
4862      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4863           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4864           with MatGetSize()
4865 
4866     Not Collective
4867 
4868    Input Parameters:
4869 +    A - the matrix
4870 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4871 
4872    Output Parameter:
4873 .    A_loc - the local sequential matrix generated
4874 
4875     Level: developer
4876 
4877 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4878 
4879 @*/
4880 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4881 {
4882   PetscErrorCode ierr;
4883   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4884   Mat_SeqAIJ     *mat,*a,*b;
4885   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4886   MatScalar      *aa,*ba,*cam;
4887   PetscScalar    *ca;
4888   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4889   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4890   PetscBool      match;
4891   MPI_Comm       comm;
4892   PetscMPIInt    size;
4893 
4894   PetscFunctionBegin;
4895   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4896   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4897   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4898   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4899   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4900 
4901   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4902   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4903   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4904   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4905   aa = a->a; ba = b->a;
4906   if (scall == MAT_INITIAL_MATRIX) {
4907     if (size == 1) {
4908       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4909       PetscFunctionReturn(0);
4910     }
4911 
4912     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4913     ci[0] = 0;
4914     for (i=0; i<am; i++) {
4915       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4916     }
4917     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4918     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4919     k    = 0;
4920     for (i=0; i<am; i++) {
4921       ncols_o = bi[i+1] - bi[i];
4922       ncols_d = ai[i+1] - ai[i];
4923       /* off-diagonal portion of A */
4924       for (jo=0; jo<ncols_o; jo++) {
4925         col = cmap[*bj];
4926         if (col >= cstart) break;
4927         cj[k]   = col; bj++;
4928         ca[k++] = *ba++;
4929       }
4930       /* diagonal portion of A */
4931       for (j=0; j<ncols_d; j++) {
4932         cj[k]   = cstart + *aj++;
4933         ca[k++] = *aa++;
4934       }
4935       /* off-diagonal portion of A */
4936       for (j=jo; j<ncols_o; j++) {
4937         cj[k]   = cmap[*bj++];
4938         ca[k++] = *ba++;
4939       }
4940     }
4941     /* put together the new matrix */
4942     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4943     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4944     /* Since these are PETSc arrays, change flags to free them as necessary. */
4945     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4946     mat->free_a  = PETSC_TRUE;
4947     mat->free_ij = PETSC_TRUE;
4948     mat->nonew   = 0;
4949   } else if (scall == MAT_REUSE_MATRIX) {
4950     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4951     ci = mat->i; cj = mat->j; cam = mat->a;
4952     for (i=0; i<am; i++) {
4953       /* off-diagonal portion of A */
4954       ncols_o = bi[i+1] - bi[i];
4955       for (jo=0; jo<ncols_o; jo++) {
4956         col = cmap[*bj];
4957         if (col >= cstart) break;
4958         *cam++ = *ba++; bj++;
4959       }
4960       /* diagonal portion of A */
4961       ncols_d = ai[i+1] - ai[i];
4962       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4963       /* off-diagonal portion of A */
4964       for (j=jo; j<ncols_o; j++) {
4965         *cam++ = *ba++; bj++;
4966       }
4967     }
4968   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4969   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4970   PetscFunctionReturn(0);
4971 }
4972 
4973 /*@C
4974      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4975 
4976     Not Collective
4977 
4978    Input Parameters:
4979 +    A - the matrix
4980 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4981 -    row, col - index sets of rows and columns to extract (or NULL)
4982 
4983    Output Parameter:
4984 .    A_loc - the local sequential matrix generated
4985 
4986     Level: developer
4987 
4988 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
4989 
4990 @*/
4991 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4992 {
4993   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
4994   PetscErrorCode ierr;
4995   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4996   IS             isrowa,iscola;
4997   Mat            *aloc;
4998   PetscBool      match;
4999 
5000   PetscFunctionBegin;
5001   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5002   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5003   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5004   if (!row) {
5005     start = A->rmap->rstart; end = A->rmap->rend;
5006     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5007   } else {
5008     isrowa = *row;
5009   }
5010   if (!col) {
5011     start = A->cmap->rstart;
5012     cmap  = a->garray;
5013     nzA   = a->A->cmap->n;
5014     nzB   = a->B->cmap->n;
5015     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5016     ncols = 0;
5017     for (i=0; i<nzB; i++) {
5018       if (cmap[i] < start) idx[ncols++] = cmap[i];
5019       else break;
5020     }
5021     imark = i;
5022     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5023     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5024     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5025   } else {
5026     iscola = *col;
5027   }
5028   if (scall != MAT_INITIAL_MATRIX) {
5029     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5030     aloc[0] = *A_loc;
5031   }
5032   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5033   *A_loc = aloc[0];
5034   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5035   if (!row) {
5036     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5037   }
5038   if (!col) {
5039     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5040   }
5041   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5042   PetscFunctionReturn(0);
5043 }
5044 
5045 /*@C
5046     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5047 
5048     Collective on Mat
5049 
5050    Input Parameters:
5051 +    A,B - the matrices in mpiaij format
5052 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5053 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5054 
5055    Output Parameter:
5056 +    rowb, colb - index sets of rows and columns of B to extract
5057 -    B_seq - the sequential matrix generated
5058 
5059     Level: developer
5060 
5061 @*/
5062 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5063 {
5064   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5065   PetscErrorCode ierr;
5066   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5067   IS             isrowb,iscolb;
5068   Mat            *bseq=NULL;
5069 
5070   PetscFunctionBegin;
5071   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5072     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5073   }
5074   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5075 
5076   if (scall == MAT_INITIAL_MATRIX) {
5077     start = A->cmap->rstart;
5078     cmap  = a->garray;
5079     nzA   = a->A->cmap->n;
5080     nzB   = a->B->cmap->n;
5081     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5082     ncols = 0;
5083     for (i=0; i<nzB; i++) {  /* row < local row index */
5084       if (cmap[i] < start) idx[ncols++] = cmap[i];
5085       else break;
5086     }
5087     imark = i;
5088     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5089     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5090     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5091     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5092   } else {
5093     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5094     isrowb  = *rowb; iscolb = *colb;
5095     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5096     bseq[0] = *B_seq;
5097   }
5098   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5099   *B_seq = bseq[0];
5100   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5101   if (!rowb) {
5102     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5103   } else {
5104     *rowb = isrowb;
5105   }
5106   if (!colb) {
5107     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5108   } else {
5109     *colb = iscolb;
5110   }
5111   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5112   PetscFunctionReturn(0);
5113 }
5114 
5115 /*
5116     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5117     of the OFF-DIAGONAL portion of local A
5118 
5119     Collective on Mat
5120 
5121    Input Parameters:
5122 +    A,B - the matrices in mpiaij format
5123 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5124 
5125    Output Parameter:
5126 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5127 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5128 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5129 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5130 
5131     Level: developer
5132 
5133 */
5134 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5135 {
5136   VecScatter_MPI_General *gen_to,*gen_from;
5137   PetscErrorCode         ierr;
5138   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5139   Mat_SeqAIJ             *b_oth;
5140   VecScatter             ctx;
5141   MPI_Comm               comm;
5142   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5143   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5144   PetscInt               *rvalues,*svalues;
5145   MatScalar              *b_otha,*bufa,*bufA;
5146   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5147   MPI_Request            *rwaits = NULL,*swaits = NULL;
5148   MPI_Status             *sstatus,rstatus;
5149   PetscMPIInt            jj,size;
5150   PetscInt               *cols,sbs,rbs;
5151   PetscScalar            *vals;
5152 
5153   PetscFunctionBegin;
5154   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5155   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5156 
5157   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5158     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5159   }
5160   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5161   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5162 
5163   if (size == 1) {
5164     startsj_s = NULL;
5165     bufa_ptr  = NULL;
5166     *B_oth    = NULL;
5167     PetscFunctionReturn(0);
5168   }
5169 
5170   ctx = a->Mvctx;
5171   if (a->Mvctx->mpi3 && !a->Mvctx_mpi1) {
5172     /* a->Mvctx is type of MPI3 which is not implemented for Mat-Mat ops,
5173      thus create a->Mvctx_mpi1 */
5174     a->Mvctx_mpi1_flg = PETSC_TRUE;
5175     ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5176     ctx = a->Mvctx_mpi1;
5177   }
5178   tag = ((PetscObject)ctx)->tag;
5179 
5180   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5181   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5182   nrecvs   = gen_from->n;
5183   nsends   = gen_to->n;
5184 
5185   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5186   srow    = gen_to->indices;    /* local row index to be sent */
5187   sstarts = gen_to->starts;
5188   sprocs  = gen_to->procs;
5189   sstatus = gen_to->sstatus;
5190   sbs     = gen_to->bs;
5191   rstarts = gen_from->starts;
5192   rprocs  = gen_from->procs;
5193   rbs     = gen_from->bs;
5194 
5195   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5196   if (scall == MAT_INITIAL_MATRIX) {
5197     /* i-array */
5198     /*---------*/
5199     /*  post receives */
5200     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5201     for (i=0; i<nrecvs; i++) {
5202       rowlen = rvalues + rstarts[i]*rbs;
5203       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5204       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5205     }
5206 
5207     /* pack the outgoing message */
5208     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5209 
5210     sstartsj[0] = 0;
5211     rstartsj[0] = 0;
5212     len         = 0; /* total length of j or a array to be sent */
5213     k           = 0;
5214     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5215     for (i=0; i<nsends; i++) {
5216       rowlen = svalues + sstarts[i]*sbs;
5217       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5218       for (j=0; j<nrows; j++) {
5219         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5220         for (l=0; l<sbs; l++) {
5221           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5222 
5223           rowlen[j*sbs+l] = ncols;
5224 
5225           len += ncols;
5226           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5227         }
5228         k++;
5229       }
5230       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5231 
5232       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5233     }
5234     /* recvs and sends of i-array are completed */
5235     i = nrecvs;
5236     while (i--) {
5237       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5238     }
5239     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5240     ierr = PetscFree(svalues);CHKERRQ(ierr);
5241 
5242     /* allocate buffers for sending j and a arrays */
5243     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5244     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5245 
5246     /* create i-array of B_oth */
5247     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5248 
5249     b_othi[0] = 0;
5250     len       = 0; /* total length of j or a array to be received */
5251     k         = 0;
5252     for (i=0; i<nrecvs; i++) {
5253       rowlen = rvalues + rstarts[i]*rbs;
5254       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5255       for (j=0; j<nrows; j++) {
5256         b_othi[k+1] = b_othi[k] + rowlen[j];
5257         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5258         k++;
5259       }
5260       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5261     }
5262     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5263 
5264     /* allocate space for j and a arrrays of B_oth */
5265     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5266     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5267 
5268     /* j-array */
5269     /*---------*/
5270     /*  post receives of j-array */
5271     for (i=0; i<nrecvs; i++) {
5272       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5273       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5274     }
5275 
5276     /* pack the outgoing message j-array */
5277     k = 0;
5278     for (i=0; i<nsends; i++) {
5279       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5280       bufJ  = bufj+sstartsj[i];
5281       for (j=0; j<nrows; j++) {
5282         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5283         for (ll=0; ll<sbs; ll++) {
5284           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5285           for (l=0; l<ncols; l++) {
5286             *bufJ++ = cols[l];
5287           }
5288           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5289         }
5290       }
5291       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5292     }
5293 
5294     /* recvs and sends of j-array are completed */
5295     i = nrecvs;
5296     while (i--) {
5297       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5298     }
5299     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5300   } else if (scall == MAT_REUSE_MATRIX) {
5301     sstartsj = *startsj_s;
5302     rstartsj = *startsj_r;
5303     bufa     = *bufa_ptr;
5304     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5305     b_otha   = b_oth->a;
5306   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5307 
5308   /* a-array */
5309   /*---------*/
5310   /*  post receives of a-array */
5311   for (i=0; i<nrecvs; i++) {
5312     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5313     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5314   }
5315 
5316   /* pack the outgoing message a-array */
5317   k = 0;
5318   for (i=0; i<nsends; i++) {
5319     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5320     bufA  = bufa+sstartsj[i];
5321     for (j=0; j<nrows; j++) {
5322       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5323       for (ll=0; ll<sbs; ll++) {
5324         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5325         for (l=0; l<ncols; l++) {
5326           *bufA++ = vals[l];
5327         }
5328         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5329       }
5330     }
5331     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5332   }
5333   /* recvs and sends of a-array are completed */
5334   i = nrecvs;
5335   while (i--) {
5336     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5337   }
5338   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5339   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5340 
5341   if (scall == MAT_INITIAL_MATRIX) {
5342     /* put together the new matrix */
5343     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5344 
5345     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5346     /* Since these are PETSc arrays, change flags to free them as necessary. */
5347     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5348     b_oth->free_a  = PETSC_TRUE;
5349     b_oth->free_ij = PETSC_TRUE;
5350     b_oth->nonew   = 0;
5351 
5352     ierr = PetscFree(bufj);CHKERRQ(ierr);
5353     if (!startsj_s || !bufa_ptr) {
5354       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5355       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5356     } else {
5357       *startsj_s = sstartsj;
5358       *startsj_r = rstartsj;
5359       *bufa_ptr  = bufa;
5360     }
5361   }
5362   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5363   PetscFunctionReturn(0);
5364 }
5365 
5366 /*@C
5367   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5368 
5369   Not Collective
5370 
5371   Input Parameters:
5372 . A - The matrix in mpiaij format
5373 
5374   Output Parameter:
5375 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5376 . colmap - A map from global column index to local index into lvec
5377 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5378 
5379   Level: developer
5380 
5381 @*/
5382 #if defined(PETSC_USE_CTABLE)
5383 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5384 #else
5385 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5386 #endif
5387 {
5388   Mat_MPIAIJ *a;
5389 
5390   PetscFunctionBegin;
5391   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5392   PetscValidPointer(lvec, 2);
5393   PetscValidPointer(colmap, 3);
5394   PetscValidPointer(multScatter, 4);
5395   a = (Mat_MPIAIJ*) A->data;
5396   if (lvec) *lvec = a->lvec;
5397   if (colmap) *colmap = a->colmap;
5398   if (multScatter) *multScatter = a->Mvctx;
5399   PetscFunctionReturn(0);
5400 }
5401 
5402 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5403 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5404 #if defined(PETSC_HAVE_MKL_SPARSE)
5405 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5406 #endif
5407 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5408 #if defined(PETSC_HAVE_ELEMENTAL)
5409 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5410 #endif
5411 #if defined(PETSC_HAVE_HYPRE)
5412 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5413 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5414 #endif
5415 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5416 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5417 
5418 /*
5419     Computes (B'*A')' since computing B*A directly is untenable
5420 
5421                n                       p                          p
5422         (              )       (              )         (                  )
5423       m (      A       )  *  n (       B      )   =   m (         C        )
5424         (              )       (              )         (                  )
5425 
5426 */
5427 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5428 {
5429   PetscErrorCode ierr;
5430   Mat            At,Bt,Ct;
5431 
5432   PetscFunctionBegin;
5433   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5434   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5435   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5436   ierr = MatDestroy(&At);CHKERRQ(ierr);
5437   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5438   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5439   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5440   PetscFunctionReturn(0);
5441 }
5442 
5443 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5444 {
5445   PetscErrorCode ierr;
5446   PetscInt       m=A->rmap->n,n=B->cmap->n;
5447   Mat            Cmat;
5448 
5449   PetscFunctionBegin;
5450   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5451   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5452   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5453   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5454   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5455   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5456   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5457   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5458 
5459   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5460 
5461   *C = Cmat;
5462   PetscFunctionReturn(0);
5463 }
5464 
5465 /* ----------------------------------------------------------------*/
5466 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5467 {
5468   PetscErrorCode ierr;
5469 
5470   PetscFunctionBegin;
5471   if (scall == MAT_INITIAL_MATRIX) {
5472     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5473     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5474     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5475   }
5476   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5477   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5478   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5479   PetscFunctionReturn(0);
5480 }
5481 
5482 /*MC
5483    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5484 
5485    Options Database Keys:
5486 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5487 
5488   Level: beginner
5489 
5490 .seealso: MatCreateAIJ()
5491 M*/
5492 
5493 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5494 {
5495   Mat_MPIAIJ     *b;
5496   PetscErrorCode ierr;
5497   PetscMPIInt    size;
5498 
5499   PetscFunctionBegin;
5500   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5501 
5502   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5503   B->data       = (void*)b;
5504   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5505   B->assembled  = PETSC_FALSE;
5506   B->insertmode = NOT_SET_VALUES;
5507   b->size       = size;
5508 
5509   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5510 
5511   /* build cache for off array entries formed */
5512   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5513 
5514   b->donotstash  = PETSC_FALSE;
5515   b->colmap      = 0;
5516   b->garray      = 0;
5517   b->roworiented = PETSC_TRUE;
5518 
5519   /* stuff used for matrix vector multiply */
5520   b->lvec  = NULL;
5521   b->Mvctx = NULL;
5522 
5523   /* stuff for MatGetRow() */
5524   b->rowindices   = 0;
5525   b->rowvalues    = 0;
5526   b->getrowactive = PETSC_FALSE;
5527 
5528   /* flexible pointer used in CUSP/CUSPARSE classes */
5529   b->spptr = NULL;
5530 
5531   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5532   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5533   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5534   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5535   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5536   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5537   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5538   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5539   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5540 #if defined(PETSC_HAVE_MKL_SPARSE)
5541   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5542 #endif
5543   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5544   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5545 #if defined(PETSC_HAVE_ELEMENTAL)
5546   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5547 #endif
5548 #if defined(PETSC_HAVE_HYPRE)
5549   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5550 #endif
5551   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5552   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5553   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5554   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5555   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5556 #if defined(PETSC_HAVE_HYPRE)
5557   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5558 #endif
5559   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5560   PetscFunctionReturn(0);
5561 }
5562 
5563 /*@C
5564      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5565          and "off-diagonal" part of the matrix in CSR format.
5566 
5567    Collective on MPI_Comm
5568 
5569    Input Parameters:
5570 +  comm - MPI communicator
5571 .  m - number of local rows (Cannot be PETSC_DECIDE)
5572 .  n - This value should be the same as the local size used in creating the
5573        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5574        calculated if N is given) For square matrices n is almost always m.
5575 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5576 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5577 .   i - row indices for "diagonal" portion of matrix
5578 .   j - column indices
5579 .   a - matrix values
5580 .   oi - row indices for "off-diagonal" portion of matrix
5581 .   oj - column indices
5582 -   oa - matrix values
5583 
5584    Output Parameter:
5585 .   mat - the matrix
5586 
5587    Level: advanced
5588 
5589    Notes:
5590        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5591        must free the arrays once the matrix has been destroyed and not before.
5592 
5593        The i and j indices are 0 based
5594 
5595        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5596 
5597        This sets local rows and cannot be used to set off-processor values.
5598 
5599        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5600        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5601        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5602        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5603        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5604        communication if it is known that only local entries will be set.
5605 
5606 .keywords: matrix, aij, compressed row, sparse, parallel
5607 
5608 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5609           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5610 @*/
5611 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5612 {
5613   PetscErrorCode ierr;
5614   Mat_MPIAIJ     *maij;
5615 
5616   PetscFunctionBegin;
5617   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5618   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5619   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5620   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5621   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5622   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5623   maij = (Mat_MPIAIJ*) (*mat)->data;
5624 
5625   (*mat)->preallocated = PETSC_TRUE;
5626 
5627   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5628   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5629 
5630   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5631   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5632 
5633   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5634   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5635   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5636   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5637 
5638   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5639   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5640   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5641   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5642   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5643   PetscFunctionReturn(0);
5644 }
5645 
5646 /*
5647     Special version for direct calls from Fortran
5648 */
5649 #include <petsc/private/fortranimpl.h>
5650 
5651 /* Change these macros so can be used in void function */
5652 #undef CHKERRQ
5653 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5654 #undef SETERRQ2
5655 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5656 #undef SETERRQ3
5657 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5658 #undef SETERRQ
5659 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5660 
5661 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5662 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5663 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5664 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5665 #else
5666 #endif
5667 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5668 {
5669   Mat            mat  = *mmat;
5670   PetscInt       m    = *mm, n = *mn;
5671   InsertMode     addv = *maddv;
5672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5673   PetscScalar    value;
5674   PetscErrorCode ierr;
5675 
5676   MatCheckPreallocated(mat,1);
5677   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5678 
5679 #if defined(PETSC_USE_DEBUG)
5680   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5681 #endif
5682   {
5683     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5684     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5685     PetscBool roworiented = aij->roworiented;
5686 
5687     /* Some Variables required in the macro */
5688     Mat        A                 = aij->A;
5689     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5690     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5691     MatScalar  *aa               = a->a;
5692     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5693     Mat        B                 = aij->B;
5694     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5695     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5696     MatScalar  *ba               = b->a;
5697 
5698     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5699     PetscInt  nonew = a->nonew;
5700     MatScalar *ap1,*ap2;
5701 
5702     PetscFunctionBegin;
5703     for (i=0; i<m; i++) {
5704       if (im[i] < 0) continue;
5705 #if defined(PETSC_USE_DEBUG)
5706       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5707 #endif
5708       if (im[i] >= rstart && im[i] < rend) {
5709         row      = im[i] - rstart;
5710         lastcol1 = -1;
5711         rp1      = aj + ai[row];
5712         ap1      = aa + ai[row];
5713         rmax1    = aimax[row];
5714         nrow1    = ailen[row];
5715         low1     = 0;
5716         high1    = nrow1;
5717         lastcol2 = -1;
5718         rp2      = bj + bi[row];
5719         ap2      = ba + bi[row];
5720         rmax2    = bimax[row];
5721         nrow2    = bilen[row];
5722         low2     = 0;
5723         high2    = nrow2;
5724 
5725         for (j=0; j<n; j++) {
5726           if (roworiented) value = v[i*n+j];
5727           else value = v[i+j*m];
5728           if (in[j] >= cstart && in[j] < cend) {
5729             col = in[j] - cstart;
5730             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5731             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5732           } else if (in[j] < 0) continue;
5733 #if defined(PETSC_USE_DEBUG)
5734           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5735 #endif
5736           else {
5737             if (mat->was_assembled) {
5738               if (!aij->colmap) {
5739                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5740               }
5741 #if defined(PETSC_USE_CTABLE)
5742               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5743               col--;
5744 #else
5745               col = aij->colmap[in[j]] - 1;
5746 #endif
5747               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5748               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5749                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5750                 col  =  in[j];
5751                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5752                 B     = aij->B;
5753                 b     = (Mat_SeqAIJ*)B->data;
5754                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5755                 rp2   = bj + bi[row];
5756                 ap2   = ba + bi[row];
5757                 rmax2 = bimax[row];
5758                 nrow2 = bilen[row];
5759                 low2  = 0;
5760                 high2 = nrow2;
5761                 bm    = aij->B->rmap->n;
5762                 ba    = b->a;
5763               }
5764             } else col = in[j];
5765             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5766           }
5767         }
5768       } else if (!aij->donotstash) {
5769         if (roworiented) {
5770           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5771         } else {
5772           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5773         }
5774       }
5775     }
5776   }
5777   PetscFunctionReturnVoid();
5778 }
5779 
5780