xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision feff33ee0b5b037fa8f9f294dede656a2f85cc47)
1 
2 
3 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
4 #include <petsc/private/vecimpl.h>
5 #include <petsc/private/isimpl.h>
6 #include <petscblaslapack.h>
7 #include <petscsf.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
22    enough exist.
23 
24   Level: beginner
25 
26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
27 M*/
28 
29 /*MC
30    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
31 
32    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
33    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
34    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
35   for communicators controlling multiple processes.  It is recommended that you call both of
36   the above preallocation routines for simplicity.
37 
38    Options Database Keys:
39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
40 
41   Level: beginner
42 
43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
44 M*/
45 
46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
47 {
48   PetscErrorCode ierr;
49   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
50 
51   PetscFunctionBegin;
52   if (mat->A) {
53     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
54     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
55   }
56   PetscFunctionReturn(0);
57 }
58 
59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
60 {
61   PetscErrorCode  ierr;
62   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
63   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
64   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
65   const PetscInt  *ia,*ib;
66   const MatScalar *aa,*bb;
67   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
68   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
69 
70   PetscFunctionBegin;
71   *keptrows = 0;
72   ia        = a->i;
73   ib        = b->i;
74   for (i=0; i<m; i++) {
75     na = ia[i+1] - ia[i];
76     nb = ib[i+1] - ib[i];
77     if (!na && !nb) {
78       cnt++;
79       goto ok1;
80     }
81     aa = a->a + ia[i];
82     for (j=0; j<na; j++) {
83       if (aa[j] != 0.0) goto ok1;
84     }
85     bb = b->a + ib[i];
86     for (j=0; j <nb; j++) {
87       if (bb[j] != 0.0) goto ok1;
88     }
89     cnt++;
90 ok1:;
91   }
92   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
93   if (!n0rows) PetscFunctionReturn(0);
94   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
95   cnt  = 0;
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) continue;
100     aa = a->a + ia[i];
101     for (j=0; j<na;j++) {
102       if (aa[j] != 0.0) {
103         rows[cnt++] = rstart + i;
104         goto ok2;
105       }
106     }
107     bb = b->a + ib[i];
108     for (j=0; j<nb; j++) {
109       if (bb[j] != 0.0) {
110         rows[cnt++] = rstart + i;
111         goto ok2;
112       }
113     }
114 ok2:;
115   }
116   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
117   PetscFunctionReturn(0);
118 }
119 
120 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
121 {
122   PetscErrorCode    ierr;
123   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
124 
125   PetscFunctionBegin;
126   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
127     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
128   } else {
129     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
130   }
131   PetscFunctionReturn(0);
132 }
133 
134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
135 {
136   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
137   PetscErrorCode ierr;
138   PetscInt       i,rstart,nrows,*rows;
139 
140   PetscFunctionBegin;
141   *zrows = NULL;
142   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
143   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
144   for (i=0; i<nrows; i++) rows[i] += rstart;
145   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
150 {
151   PetscErrorCode ierr;
152   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
153   PetscInt       i,n,*garray = aij->garray;
154   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
155   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
156   PetscReal      *work;
157 
158   PetscFunctionBegin;
159   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
160   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
161   if (type == NORM_2) {
162     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
163       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
164     }
165     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
166       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
167     }
168   } else if (type == NORM_1) {
169     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
170       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
171     }
172     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
173       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
174     }
175   } else if (type == NORM_INFINITY) {
176     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
177       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
178     }
179     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
180       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
181     }
182 
183   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
184   if (type == NORM_INFINITY) {
185     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
186   } else {
187     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
188   }
189   ierr = PetscFree(work);CHKERRQ(ierr);
190   if (type == NORM_2) {
191     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
192   }
193   PetscFunctionReturn(0);
194 }
195 
196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
197 {
198   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
199   IS              sis,gis;
200   PetscErrorCode  ierr;
201   const PetscInt  *isis,*igis;
202   PetscInt        n,*iis,nsis,ngis,rstart,i;
203 
204   PetscFunctionBegin;
205   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
206   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
207   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
208   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
209   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
210   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
211 
212   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
213   ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr);
214   ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr);
215   n    = ngis + nsis;
216   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
217   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
218   for (i=0; i<n; i++) iis[i] += rstart;
219   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
220 
221   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
222   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
223   ierr = ISDestroy(&sis);CHKERRQ(ierr);
224   ierr = ISDestroy(&gis);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 /*
229     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
230     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
231 
232     Only for square matrices
233 
234     Used by a preconditioner, hence PETSC_EXTERN
235 */
236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
237 {
238   PetscMPIInt    rank,size;
239   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
240   PetscErrorCode ierr;
241   Mat            mat;
242   Mat_SeqAIJ     *gmata;
243   PetscMPIInt    tag;
244   MPI_Status     status;
245   PetscBool      aij;
246   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
247 
248   PetscFunctionBegin;
249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
251   if (!rank) {
252     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
253     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
254   }
255   if (reuse == MAT_INITIAL_MATRIX) {
256     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
257     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
258     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
259     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
260     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
261     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
262     ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
263     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
264     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
265 
266     rowners[0] = 0;
267     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
268     rstart = rowners[rank];
269     rend   = rowners[rank+1];
270     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
271     if (!rank) {
272       gmata = (Mat_SeqAIJ*) gmat->data;
273       /* send row lengths to all processors */
274       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
275       for (i=1; i<size; i++) {
276         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
277       }
278       /* determine number diagonal and off-diagonal counts */
279       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
280       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
281       jj   = 0;
282       for (i=0; i<m; i++) {
283         for (j=0; j<dlens[i]; j++) {
284           if (gmata->j[jj] < rstart) ld[i]++;
285           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
286           jj++;
287         }
288       }
289       /* send column indices to other processes */
290       for (i=1; i<size; i++) {
291         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
292         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
293         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
294       }
295 
296       /* send numerical values to other processes */
297       for (i=1; i<size; i++) {
298         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
299         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
300       }
301       gmataa = gmata->a;
302       gmataj = gmata->j;
303 
304     } else {
305       /* receive row lengths */
306       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
307       /* receive column indices */
308       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
309       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
310       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
311       /* determine number diagonal and off-diagonal counts */
312       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
313       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
314       jj   = 0;
315       for (i=0; i<m; i++) {
316         for (j=0; j<dlens[i]; j++) {
317           if (gmataj[jj] < rstart) ld[i]++;
318           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
319           jj++;
320         }
321       }
322       /* receive numerical values */
323       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
324       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
325     }
326     /* set preallocation */
327     for (i=0; i<m; i++) {
328       dlens[i] -= olens[i];
329     }
330     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
331     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
332 
333     for (i=0; i<m; i++) {
334       dlens[i] += olens[i];
335     }
336     cnt = 0;
337     for (i=0; i<m; i++) {
338       row  = rstart + i;
339       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
340       cnt += dlens[i];
341     }
342     if (rank) {
343       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
344     }
345     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
346     ierr = PetscFree(rowners);CHKERRQ(ierr);
347 
348     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
349 
350     *inmat = mat;
351   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
352     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
353     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
354     mat  = *inmat;
355     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
356     if (!rank) {
357       /* send numerical values to other processes */
358       gmata  = (Mat_SeqAIJ*) gmat->data;
359       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
360       gmataa = gmata->a;
361       for (i=1; i<size; i++) {
362         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
363         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
364       }
365       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
366     } else {
367       /* receive numerical values from process 0*/
368       nz   = Ad->nz + Ao->nz;
369       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
370       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
371     }
372     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
373     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
374     ad = Ad->a;
375     ao = Ao->a;
376     if (mat->rmap->n) {
377       i  = 0;
378       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
379       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
380     }
381     for (i=1; i<mat->rmap->n; i++) {
382       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
383       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
384     }
385     i--;
386     if (mat->rmap->n) {
387       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
388     }
389     if (rank) {
390       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
391     }
392   }
393   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
395   PetscFunctionReturn(0);
396 }
397 
398 /*
399   Local utility routine that creates a mapping from the global column
400 number to the local number in the off-diagonal part of the local
401 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
402 a slightly higher hash table cost; without it it is not scalable (each processor
403 has an order N integer array but is fast to acess.
404 */
405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
406 {
407   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscInt       n = aij->B->cmap->n,i;
410 
411   PetscFunctionBegin;
412   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
413 #if defined(PETSC_USE_CTABLE)
414   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
415   for (i=0; i<n; i++) {
416     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
417   }
418 #else
419   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
420   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
421   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
422 #endif
423   PetscFunctionReturn(0);
424 }
425 
426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
427 { \
428     if (col <= lastcol1)  low1 = 0;     \
429     else                 high1 = nrow1; \
430     lastcol1 = col;\
431     while (high1-low1 > 5) { \
432       t = (low1+high1)/2; \
433       if (rp1[t] > col) high1 = t; \
434       else              low1  = t; \
435     } \
436       for (_i=low1; _i<high1; _i++) { \
437         if (rp1[_i] > col) break; \
438         if (rp1[_i] == col) { \
439           if (addv == ADD_VALUES) ap1[_i] += value;   \
440           else                    ap1[_i] = value; \
441           goto a_noinsert; \
442         } \
443       }  \
444       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
445       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
446       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
447       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
448       N = nrow1++ - 1; a->nz++; high1++; \
449       /* shift up all the later entries in this row */ \
450       for (ii=N; ii>=_i; ii--) { \
451         rp1[ii+1] = rp1[ii]; \
452         ap1[ii+1] = ap1[ii]; \
453       } \
454       rp1[_i] = col;  \
455       ap1[_i] = value;  \
456       A->nonzerostate++;\
457       a_noinsert: ; \
458       ailen[row] = nrow1; \
459 }
460 
461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
462   { \
463     if (col <= lastcol2) low2 = 0;                        \
464     else high2 = nrow2;                                   \
465     lastcol2 = col;                                       \
466     while (high2-low2 > 5) {                              \
467       t = (low2+high2)/2;                                 \
468       if (rp2[t] > col) high2 = t;                        \
469       else             low2  = t;                         \
470     }                                                     \
471     for (_i=low2; _i<high2; _i++) {                       \
472       if (rp2[_i] > col) break;                           \
473       if (rp2[_i] == col) {                               \
474         if (addv == ADD_VALUES) ap2[_i] += value;         \
475         else                    ap2[_i] = value;          \
476         goto b_noinsert;                                  \
477       }                                                   \
478     }                                                     \
479     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
480     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
481     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
482     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
483     N = nrow2++ - 1; b->nz++; high2++;                    \
484     /* shift up all the later entries in this row */      \
485     for (ii=N; ii>=_i; ii--) {                            \
486       rp2[ii+1] = rp2[ii];                                \
487       ap2[ii+1] = ap2[ii];                                \
488     }                                                     \
489     rp2[_i] = col;                                        \
490     ap2[_i] = value;                                      \
491     B->nonzerostate++;                                    \
492     b_noinsert: ;                                         \
493     bilen[row] = nrow2;                                   \
494   }
495 
496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
497 {
498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
499   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
500   PetscErrorCode ierr;
501   PetscInt       l,*garray = mat->garray,diag;
502 
503   PetscFunctionBegin;
504   /* code only works for square matrices A */
505 
506   /* find size of row to the left of the diagonal part */
507   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
508   row  = row - diag;
509   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
510     if (garray[b->j[b->i[row]+l]] > diag) break;
511   }
512   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
513 
514   /* diagonal part */
515   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
516 
517   /* right of diagonal part */
518   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
519   PetscFunctionReturn(0);
520 }
521 
522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
523 {
524   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
525   PetscScalar    value;
526   PetscErrorCode ierr;
527   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
528   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
529   PetscBool      roworiented = aij->roworiented;
530 
531   /* Some Variables required in the macro */
532   Mat        A                 = aij->A;
533   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
534   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
535   MatScalar  *aa               = a->a;
536   PetscBool  ignorezeroentries = a->ignorezeroentries;
537   Mat        B                 = aij->B;
538   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
539   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
540   MatScalar  *ba               = b->a;
541 
542   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
543   PetscInt  nonew;
544   MatScalar *ap1,*ap2;
545 
546   PetscFunctionBegin;
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row      = im[i] - rstart;
554       lastcol1 = -1;
555       rp1      = aj + ai[row];
556       ap1      = aa + ai[row];
557       rmax1    = aimax[row];
558       nrow1    = ailen[row];
559       low1     = 0;
560       high1    = nrow1;
561       lastcol2 = -1;
562       rp2      = bj + bi[row];
563       ap2      = ba + bi[row];
564       rmax2    = bimax[row];
565       nrow2    = bilen[row];
566       low2     = 0;
567       high2    = nrow2;
568 
569       for (j=0; j<n; j++) {
570         if (roworiented) value = v[i*n+j];
571         else             value = v[i+j*m];
572         if (in[j] >= cstart && in[j] < cend) {
573           col   = in[j] - cstart;
574           nonew = a->nonew;
575           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
576           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
577         } else if (in[j] < 0) continue;
578 #if defined(PETSC_USE_DEBUG)
579         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
580 #endif
581         else {
582           if (mat->was_assembled) {
583             if (!aij->colmap) {
584               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
585             }
586 #if defined(PETSC_USE_CTABLE)
587             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
593               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
594               col  =  in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ*)B->data;
598               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
599               rp2   = bj + bi[row];
600               ap2   = ba + bi[row];
601               rmax2 = bimax[row];
602               nrow2 = bilen[row];
603               low2  = 0;
604               high2 = nrow2;
605               bm    = aij->B->rmap->n;
606               ba    = b->a;
607             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
608           } else col = in[j];
609           nonew = b->nonew;
610           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
611         }
612       }
613     } else {
614       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
615       if (!aij->donotstash) {
616         mat->assembled = PETSC_FALSE;
617         if (roworiented) {
618           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
633   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= rstart && idxm[i] < rend) {
640       row = idxm[i] - rstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= cstart && idxn[j] < cend) {
645           col  = idxn[j] - cstart;
646           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!aij->colmap) {
649             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
653           col--;
654 #else
655           col = aij->colmap[idxn[j]] - 1;
656 #endif
657           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
658           else {
659             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
660           }
661         }
662       }
663     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
669 
670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscInt       nstash,reallocs;
675 
676   PetscFunctionBegin;
677   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
678 
679   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
680   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
681   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
686 {
687   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
688   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
689   PetscErrorCode ierr;
690   PetscMPIInt    n;
691   PetscInt       i,j,rstart,ncols,flg;
692   PetscInt       *row,*col;
693   PetscBool      other_disassembled;
694   PetscScalar    *val;
695 
696   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
697 
698   PetscFunctionBegin;
699   if (!aij->donotstash && !mat->nooffprocentries) {
700     while (1) {
701       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
702       if (!flg) break;
703 
704       for (i=0; i<n; ) {
705         /* Now identify the consecutive vals belonging to the same row */
706         for (j=i,rstart=row[j]; j<n; j++) {
707           if (row[j] != rstart) break;
708         }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
713 
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718   }
719   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
720   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
721 
722   /* determine if any processor has disassembled, if so we must
723      also disassemble ourselfs, in order that we may reassemble. */
724   /*
725      if nonzero structure of submatrix B cannot change then we know that
726      no processor disassembled thus we can skip this stuff
727   */
728   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
729     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
730     if (mat->was_assembled && !other_disassembled) {
731       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
732     }
733   }
734   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
735     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
736   }
737   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
738   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
739   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
740 
741   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
742 
743   aij->rowvalues = 0;
744 
745   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
746   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
747 
748   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
749   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
750     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
751     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
757 {
758   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
759   PetscErrorCode ierr;
760 
761   PetscFunctionBegin;
762   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
763   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
768 {
769   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
770   PetscInt      *lrows;
771   PetscInt       r, len;
772   PetscErrorCode ierr;
773 
774   PetscFunctionBegin;
775   /* get locally owned rows */
776   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
777   /* fix right hand side if needed */
778   if (x && b) {
779     const PetscScalar *xx;
780     PetscScalar       *bb;
781 
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
789   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
790   if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */
791     PetscBool cong;
792     ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr);
793     if (cong) A->congruentlayouts = 1;
794     else      A->congruentlayouts = 0;
795   }
796   if ((diag != 0.0) && A->congruentlayouts) {
797     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
798   } else if (diag != 0.0) {
799     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
800     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
801     for (r = 0; r < len; ++r) {
802       const PetscInt row = lrows[r] + A->rmap->rstart;
803       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
804     }
805     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
807   } else {
808     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(lrows);CHKERRQ(ierr);
811 
812   /* only change matrix nonzero state if pattern was allowed to be changed */
813   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
814     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
815     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
816   }
817   PetscFunctionReturn(0);
818 }
819 
820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
821 {
822   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
823   PetscErrorCode    ierr;
824   PetscMPIInt       n = A->rmap->n;
825   PetscInt          i,j,r,m,p = 0,len = 0;
826   PetscInt          *lrows,*owners = A->rmap->range;
827   PetscSFNode       *rrows;
828   PetscSF           sf;
829   const PetscScalar *xx;
830   PetscScalar       *bb,*mask;
831   Vec               xmask,lmask;
832   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
833   const PetscInt    *aj, *ii,*ridx;
834   PetscScalar       *aa;
835 
836   PetscFunctionBegin;
837   /* Create SF where leaves are input rows and roots are owned rows */
838   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
839   for (r = 0; r < n; ++r) lrows[r] = -1;
840   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
841   for (r = 0; r < N; ++r) {
842     const PetscInt idx   = rows[r];
843     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
844     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
845       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
846     }
847     rrows[r].rank  = p;
848     rrows[r].index = rows[r] - owners[p];
849   }
850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
852   /* Collect flags for rows to be zeroed */
853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
856   /* Compress and put in row numbers */
857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
858   /* zero diagonal part of matrix */
859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
860   /* handle off diagonal part of matrix */
861   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
869   if (x) {
870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
874   }
875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
876   /* remove zeroed rows of off diagonal matrix */
877   ii = aij->i;
878   for (i=0; i<len; i++) {
879     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
880   }
881   /* loop over all elements of off process part of matrix zeroing removed columns*/
882   if (aij->compressedrow.use) {
883     m    = aij->compressedrow.nrows;
884     ii   = aij->compressedrow.i;
885     ridx = aij->compressedrow.rindex;
886     for (i=0; i<m; i++) {
887       n  = ii[i+1] - ii[i];
888       aj = aij->j + ii[i];
889       aa = aij->a + ii[i];
890 
891       for (j=0; j<n; j++) {
892         if (PetscAbsScalar(mask[*aj])) {
893           if (b) bb[*ridx] -= *aa*xx[*aj];
894           *aa = 0.0;
895         }
896         aa++;
897         aj++;
898       }
899       ridx++;
900     }
901   } else { /* do not use compressed row format */
902     m = l->B->rmap->n;
903     for (i=0; i<m; i++) {
904       n  = ii[i+1] - ii[i];
905       aj = aij->j + ii[i];
906       aa = aij->a + ii[i];
907       for (j=0; j<n; j++) {
908         if (PetscAbsScalar(mask[*aj])) {
909           if (b) bb[i] -= *aa*xx[*aj];
910           *aa = 0.0;
911         }
912         aa++;
913         aj++;
914       }
915     }
916   }
917   if (x) {
918     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
919     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
920   }
921   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
922   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
923   ierr = PetscFree(lrows);CHKERRQ(ierr);
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
927     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
928     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
929   }
930   PetscFunctionReturn(0);
931 }
932 
933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
934 {
935   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       nt;
938   VecScatter     Mvctx = a->Mvctx;
939 
940   PetscFunctionBegin;
941   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
942   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
943 
944   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
946   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
948   PetscFunctionReturn(0);
949 }
950 
951 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
952 {
953   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
954   PetscErrorCode ierr;
955 
956   PetscFunctionBegin;
957   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
958   PetscFunctionReturn(0);
959 }
960 
961 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
962 {
963   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
964   PetscErrorCode ierr;
965   VecScatter     Mvctx = a->Mvctx;
966 
967   PetscFunctionBegin;
968   if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1;
969   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
970   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
971   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
972   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
973   PetscFunctionReturn(0);
974 }
975 
976 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
977 {
978   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
979   PetscErrorCode ierr;
980   PetscBool      merged;
981 
982   PetscFunctionBegin;
983   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
984   /* do nondiagonal part */
985   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
986   if (!merged) {
987     /* send it on its way */
988     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
989     /* do local part */
990     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
991     /* receive remote parts: note this assumes the values are not actually */
992     /* added in yy until the next line, */
993     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
994   } else {
995     /* do local part */
996     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
997     /* send it on its way */
998     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
999     /* values actually were received in the Begin() but we need to call this nop */
1000     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1001   }
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1006 {
1007   MPI_Comm       comm;
1008   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1009   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1010   IS             Me,Notme;
1011   PetscErrorCode ierr;
1012   PetscInt       M,N,first,last,*notme,i;
1013   PetscMPIInt    size;
1014 
1015   PetscFunctionBegin;
1016   /* Easy test: symmetric diagonal block */
1017   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1018   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1019   if (!*f) PetscFunctionReturn(0);
1020   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1021   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1022   if (size == 1) PetscFunctionReturn(0);
1023 
1024   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1025   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1026   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1027   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1028   for (i=0; i<first; i++) notme[i] = i;
1029   for (i=last; i<M; i++) notme[i-last+first] = i;
1030   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1031   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1032   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1033   Aoff = Aoffs[0];
1034   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1035   Boff = Boffs[0];
1036   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1037   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1038   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1039   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1040   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1041   ierr = PetscFree(notme);CHKERRQ(ierr);
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1046 {
1047   PetscErrorCode ierr;
1048 
1049   PetscFunctionBegin;
1050   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1055 {
1056   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   /* do nondiagonal part */
1061   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1062   /* send it on its way */
1063   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1064   /* do local part */
1065   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1066   /* receive remote parts */
1067   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 /*
1072   This only works correctly for square matrices where the subblock A->A is the
1073    diagonal block
1074 */
1075 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1076 {
1077   PetscErrorCode ierr;
1078   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1079 
1080   PetscFunctionBegin;
1081   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1082   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1083   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1088 {
1089   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1090   PetscErrorCode ierr;
1091 
1092   PetscFunctionBegin;
1093   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1094   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1099 {
1100   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104 #if defined(PETSC_USE_LOG)
1105   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1106 #endif
1107   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1108   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1109   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1110   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1111 #if defined(PETSC_USE_CTABLE)
1112   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1113 #else
1114   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1115 #endif
1116   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1117   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1118   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1119   if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);}
1120   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1121   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1122   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1123 
1124   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1133 #if defined(PETSC_HAVE_ELEMENTAL)
1134   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1135 #endif
1136 #if defined(PETSC_HAVE_HYPRE)
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1139 #endif
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1144 {
1145   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1146   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1147   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1148   PetscErrorCode ierr;
1149   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1150   int            fd;
1151   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1152   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1153   PetscScalar    *column_values;
1154   PetscInt       message_count,flowcontrolcount;
1155   FILE           *file;
1156 
1157   PetscFunctionBegin;
1158   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1159   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1160   nz   = A->nz + B->nz;
1161   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1162   if (!rank) {
1163     header[0] = MAT_FILE_CLASSID;
1164     header[1] = mat->rmap->N;
1165     header[2] = mat->cmap->N;
1166 
1167     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1168     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1169     /* get largest number of rows any processor has */
1170     rlen  = mat->rmap->n;
1171     range = mat->rmap->range;
1172     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1173   } else {
1174     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1175     rlen = mat->rmap->n;
1176   }
1177 
1178   /* load up the local row counts */
1179   ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr);
1180   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1181 
1182   /* store the row lengths to the file */
1183   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1184   if (!rank) {
1185     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1186     for (i=1; i<size; i++) {
1187       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1188       rlen = range[i+1] - range[i];
1189       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1190       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1191     }
1192     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1193   } else {
1194     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1195     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1196     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1197   }
1198   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1199 
1200   /* load up the local column indices */
1201   nzmax = nz; /* th processor needs space a largest processor needs */
1202   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1203   ierr  = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr);
1204   cnt   = 0;
1205   for (i=0; i<mat->rmap->n; i++) {
1206     for (j=B->i[i]; j<B->i[i+1]; j++) {
1207       if ((col = garray[B->j[j]]) > cstart) break;
1208       column_indices[cnt++] = col;
1209     }
1210     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1211     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1212   }
1213   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1214 
1215   /* store the column indices to the file */
1216   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1217   if (!rank) {
1218     MPI_Status status;
1219     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     for (i=1; i<size; i++) {
1221       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1222       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1223       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1224       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1225       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1226     }
1227     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1228   } else {
1229     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1230     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1233   }
1234   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1235 
1236   /* load up the local column values */
1237   ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr);
1238   cnt  = 0;
1239   for (i=0; i<mat->rmap->n; i++) {
1240     for (j=B->i[i]; j<B->i[i+1]; j++) {
1241       if (garray[B->j[j]] > cstart) break;
1242       column_values[cnt++] = B->a[j];
1243     }
1244     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1245     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1246   }
1247   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1248 
1249   /* store the column values to the file */
1250   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1251   if (!rank) {
1252     MPI_Status status;
1253     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1254     for (i=1; i<size; i++) {
1255       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1256       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1257       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1258       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1259       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1260     }
1261     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1262   } else {
1263     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1264     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1265     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1266     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1267   }
1268   ierr = PetscFree(column_values);CHKERRQ(ierr);
1269 
1270   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1271   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #include <petscdraw.h>
1276 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1277 {
1278   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1279   PetscErrorCode    ierr;
1280   PetscMPIInt       rank = aij->rank,size = aij->size;
1281   PetscBool         isdraw,iascii,isbinary;
1282   PetscViewer       sviewer;
1283   PetscViewerFormat format;
1284 
1285   PetscFunctionBegin;
1286   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1287   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1288   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1289   if (iascii) {
1290     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1291     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1292       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1293       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1294       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1295       for (i=0; i<(PetscInt)size; i++) {
1296         nmax = PetscMax(nmax,nz[i]);
1297         nmin = PetscMin(nmin,nz[i]);
1298         navg += nz[i];
1299       }
1300       ierr = PetscFree(nz);CHKERRQ(ierr);
1301       navg = navg/size;
1302       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1303       PetscFunctionReturn(0);
1304     }
1305     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1306     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1307       MatInfo   info;
1308       PetscBool inodes;
1309 
1310       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1311       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1312       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1313       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1314       if (!inodes) {
1315         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1316                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1317       } else {
1318         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1319                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1320       }
1321       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1322       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1323       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1324       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1325       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1326       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1327       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1328       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1329       PetscFunctionReturn(0);
1330     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1331       PetscInt inodecount,inodelimit,*inodes;
1332       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1333       if (inodes) {
1334         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1335       } else {
1336         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1337       }
1338       PetscFunctionReturn(0);
1339     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1340       PetscFunctionReturn(0);
1341     }
1342   } else if (isbinary) {
1343     if (size == 1) {
1344       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1345       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1346     } else {
1347       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1348     }
1349     PetscFunctionReturn(0);
1350   } else if (isdraw) {
1351     PetscDraw draw;
1352     PetscBool isnull;
1353     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1354     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1355     if (isnull) PetscFunctionReturn(0);
1356   }
1357 
1358   {
1359     /* assemble the entire matrix onto first processor. */
1360     Mat        A;
1361     Mat_SeqAIJ *Aloc;
1362     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1363     MatScalar  *a;
1364 
1365     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1366     if (!rank) {
1367       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1368     } else {
1369       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1370     }
1371     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1372     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1373     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1374     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1375     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1376 
1377     /* copy over the A part */
1378     Aloc = (Mat_SeqAIJ*)aij->A->data;
1379     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1380     row  = mat->rmap->rstart;
1381     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1382     for (i=0; i<m; i++) {
1383       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1384       row++;
1385       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1386     }
1387     aj = Aloc->j;
1388     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1389 
1390     /* copy over the B part */
1391     Aloc = (Mat_SeqAIJ*)aij->B->data;
1392     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1393     row  = mat->rmap->rstart;
1394     ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr);
1395     ct   = cols;
1396     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1397     for (i=0; i<m; i++) {
1398       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1399       row++;
1400       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1401     }
1402     ierr = PetscFree(ct);CHKERRQ(ierr);
1403     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1404     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1405     /*
1406        Everyone has to call to draw the matrix since the graphics waits are
1407        synchronized across all processors that share the PetscDraw object
1408     */
1409     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1410     if (!rank) {
1411       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1412       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1413     }
1414     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1415     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1416     ierr = MatDestroy(&A);CHKERRQ(ierr);
1417   }
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1422 {
1423   PetscErrorCode ierr;
1424   PetscBool      iascii,isdraw,issocket,isbinary;
1425 
1426   PetscFunctionBegin;
1427   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1428   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1429   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1430   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1431   if (iascii || isdraw || isbinary || issocket) {
1432     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1433   }
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1438 {
1439   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1440   PetscErrorCode ierr;
1441   Vec            bb1 = 0;
1442   PetscBool      hasop;
1443 
1444   PetscFunctionBegin;
1445   if (flag == SOR_APPLY_UPPER) {
1446     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1447     PetscFunctionReturn(0);
1448   }
1449 
1450   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1451     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1452   }
1453 
1454   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1455     if (flag & SOR_ZERO_INITIAL_GUESS) {
1456       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1457       its--;
1458     }
1459 
1460     while (its--) {
1461       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1462       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1463 
1464       /* update rhs: bb1 = bb - B*x */
1465       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1466       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1467 
1468       /* local sweep */
1469       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1470     }
1471   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1472     if (flag & SOR_ZERO_INITIAL_GUESS) {
1473       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1474       its--;
1475     }
1476     while (its--) {
1477       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1478       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1479 
1480       /* update rhs: bb1 = bb - B*x */
1481       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1482       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1483 
1484       /* local sweep */
1485       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1486     }
1487   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1488     if (flag & SOR_ZERO_INITIAL_GUESS) {
1489       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1490       its--;
1491     }
1492     while (its--) {
1493       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1494       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1495 
1496       /* update rhs: bb1 = bb - B*x */
1497       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1498       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1499 
1500       /* local sweep */
1501       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1502     }
1503   } else if (flag & SOR_EISENSTAT) {
1504     Vec xx1;
1505 
1506     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1507     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1508 
1509     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1510     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1511     if (!mat->diag) {
1512       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1513       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1514     }
1515     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1516     if (hasop) {
1517       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1518     } else {
1519       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1520     }
1521     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1522 
1523     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1524 
1525     /* local sweep */
1526     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1527     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1528     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1529   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1530 
1531   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1532 
1533   matin->factorerrortype = mat->A->factorerrortype;
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1538 {
1539   Mat            aA,aB,Aperm;
1540   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1541   PetscScalar    *aa,*ba;
1542   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1543   PetscSF        rowsf,sf;
1544   IS             parcolp = NULL;
1545   PetscBool      done;
1546   PetscErrorCode ierr;
1547 
1548   PetscFunctionBegin;
1549   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1550   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1551   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1552   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1553 
1554   /* Invert row permutation to find out where my rows should go */
1555   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1556   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1557   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1558   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1559   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1560   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1561 
1562   /* Invert column permutation to find out where my columns should go */
1563   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1564   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1565   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1566   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1567   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1568   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1569   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1570 
1571   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1572   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1573   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1574 
1575   /* Find out where my gcols should go */
1576   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1577   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1578   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1579   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1580   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1581   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1582   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1583   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1584 
1585   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1586   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1587   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1588   for (i=0; i<m; i++) {
1589     PetscInt row = rdest[i],rowner;
1590     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1591     for (j=ai[i]; j<ai[i+1]; j++) {
1592       PetscInt cowner,col = cdest[aj[j]];
1593       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1594       if (rowner == cowner) dnnz[i]++;
1595       else onnz[i]++;
1596     }
1597     for (j=bi[i]; j<bi[i+1]; j++) {
1598       PetscInt cowner,col = gcdest[bj[j]];
1599       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1600       if (rowner == cowner) dnnz[i]++;
1601       else onnz[i]++;
1602     }
1603   }
1604   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1605   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1606   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1607   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1608   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1609 
1610   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1611   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1612   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1613   for (i=0; i<m; i++) {
1614     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1615     PetscInt j0,rowlen;
1616     rowlen = ai[i+1] - ai[i];
1617     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1618       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1619       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1620     }
1621     rowlen = bi[i+1] - bi[i];
1622     for (j0=j=0; j<rowlen; j0=j) {
1623       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1624       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1625     }
1626   }
1627   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1628   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1629   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1630   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1631   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1632   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1633   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1634   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1635   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1636   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1637   *B = Aperm;
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1642 {
1643   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1644   PetscErrorCode ierr;
1645 
1646   PetscFunctionBegin;
1647   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1648   if (ghosts) *ghosts = aij->garray;
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1653 {
1654   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1655   Mat            A    = mat->A,B = mat->B;
1656   PetscErrorCode ierr;
1657   PetscReal      isend[5],irecv[5];
1658 
1659   PetscFunctionBegin;
1660   info->block_size = 1.0;
1661   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1662 
1663   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1664   isend[3] = info->memory;  isend[4] = info->mallocs;
1665 
1666   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1667 
1668   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1669   isend[3] += info->memory;  isend[4] += info->mallocs;
1670   if (flag == MAT_LOCAL) {
1671     info->nz_used      = isend[0];
1672     info->nz_allocated = isend[1];
1673     info->nz_unneeded  = isend[2];
1674     info->memory       = isend[3];
1675     info->mallocs      = isend[4];
1676   } else if (flag == MAT_GLOBAL_MAX) {
1677     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1678 
1679     info->nz_used      = irecv[0];
1680     info->nz_allocated = irecv[1];
1681     info->nz_unneeded  = irecv[2];
1682     info->memory       = irecv[3];
1683     info->mallocs      = irecv[4];
1684   } else if (flag == MAT_GLOBAL_SUM) {
1685     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1686 
1687     info->nz_used      = irecv[0];
1688     info->nz_allocated = irecv[1];
1689     info->nz_unneeded  = irecv[2];
1690     info->memory       = irecv[3];
1691     info->mallocs      = irecv[4];
1692   }
1693   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1694   info->fill_ratio_needed = 0;
1695   info->factor_mallocs    = 0;
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1700 {
1701   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1702   PetscErrorCode ierr;
1703 
1704   PetscFunctionBegin;
1705   switch (op) {
1706   case MAT_NEW_NONZERO_LOCATIONS:
1707   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1708   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1709   case MAT_KEEP_NONZERO_PATTERN:
1710   case MAT_NEW_NONZERO_LOCATION_ERR:
1711   case MAT_USE_INODES:
1712   case MAT_IGNORE_ZERO_ENTRIES:
1713     MatCheckPreallocated(A,1);
1714     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1715     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1716     break;
1717   case MAT_ROW_ORIENTED:
1718     MatCheckPreallocated(A,1);
1719     a->roworiented = flg;
1720 
1721     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1722     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1723     break;
1724   case MAT_NEW_DIAGONALS:
1725     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1726     break;
1727   case MAT_IGNORE_OFF_PROC_ENTRIES:
1728     a->donotstash = flg;
1729     break;
1730   case MAT_SPD:
1731     A->spd_set = PETSC_TRUE;
1732     A->spd     = flg;
1733     if (flg) {
1734       A->symmetric                  = PETSC_TRUE;
1735       A->structurally_symmetric     = PETSC_TRUE;
1736       A->symmetric_set              = PETSC_TRUE;
1737       A->structurally_symmetric_set = PETSC_TRUE;
1738     }
1739     break;
1740   case MAT_SYMMETRIC:
1741     MatCheckPreallocated(A,1);
1742     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1743     break;
1744   case MAT_STRUCTURALLY_SYMMETRIC:
1745     MatCheckPreallocated(A,1);
1746     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1747     break;
1748   case MAT_HERMITIAN:
1749     MatCheckPreallocated(A,1);
1750     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1751     break;
1752   case MAT_SYMMETRY_ETERNAL:
1753     MatCheckPreallocated(A,1);
1754     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1755     break;
1756   case MAT_SUBMAT_SINGLEIS:
1757     A->submat_singleis = flg;
1758     break;
1759   case MAT_STRUCTURE_ONLY:
1760     /* The option is handled directly by MatSetOption() */
1761     break;
1762   default:
1763     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1764   }
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1769 {
1770   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1771   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1772   PetscErrorCode ierr;
1773   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1774   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1775   PetscInt       *cmap,*idx_p;
1776 
1777   PetscFunctionBegin;
1778   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1779   mat->getrowactive = PETSC_TRUE;
1780 
1781   if (!mat->rowvalues && (idx || v)) {
1782     /*
1783         allocate enough space to hold information from the longest row.
1784     */
1785     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1786     PetscInt   max = 1,tmp;
1787     for (i=0; i<matin->rmap->n; i++) {
1788       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1789       if (max < tmp) max = tmp;
1790     }
1791     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1792   }
1793 
1794   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1795   lrow = row - rstart;
1796 
1797   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1798   if (!v)   {pvA = 0; pvB = 0;}
1799   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1800   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1801   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1802   nztot = nzA + nzB;
1803 
1804   cmap = mat->garray;
1805   if (v  || idx) {
1806     if (nztot) {
1807       /* Sort by increasing column numbers, assuming A and B already sorted */
1808       PetscInt imark = -1;
1809       if (v) {
1810         *v = v_p = mat->rowvalues;
1811         for (i=0; i<nzB; i++) {
1812           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1813           else break;
1814         }
1815         imark = i;
1816         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1817         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1818       }
1819       if (idx) {
1820         *idx = idx_p = mat->rowindices;
1821         if (imark > -1) {
1822           for (i=0; i<imark; i++) {
1823             idx_p[i] = cmap[cworkB[i]];
1824           }
1825         } else {
1826           for (i=0; i<nzB; i++) {
1827             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1828             else break;
1829           }
1830           imark = i;
1831         }
1832         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1833         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1834       }
1835     } else {
1836       if (idx) *idx = 0;
1837       if (v)   *v   = 0;
1838     }
1839   }
1840   *nz  = nztot;
1841   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1842   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1847 {
1848   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1849 
1850   PetscFunctionBegin;
1851   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1852   aij->getrowactive = PETSC_FALSE;
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1857 {
1858   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1859   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1860   PetscErrorCode ierr;
1861   PetscInt       i,j,cstart = mat->cmap->rstart;
1862   PetscReal      sum = 0.0;
1863   MatScalar      *v;
1864 
1865   PetscFunctionBegin;
1866   if (aij->size == 1) {
1867     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1868   } else {
1869     if (type == NORM_FROBENIUS) {
1870       v = amat->a;
1871       for (i=0; i<amat->nz; i++) {
1872         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1873       }
1874       v = bmat->a;
1875       for (i=0; i<bmat->nz; i++) {
1876         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1877       }
1878       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1879       *norm = PetscSqrtReal(*norm);
1880       ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr);
1881     } else if (type == NORM_1) { /* max column norm */
1882       PetscReal *tmp,*tmp2;
1883       PetscInt  *jj,*garray = aij->garray;
1884       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1885       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1886       *norm = 0.0;
1887       v     = amat->a; jj = amat->j;
1888       for (j=0; j<amat->nz; j++) {
1889         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1890       }
1891       v = bmat->a; jj = bmat->j;
1892       for (j=0; j<bmat->nz; j++) {
1893         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1894       }
1895       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1896       for (j=0; j<mat->cmap->N; j++) {
1897         if (tmp2[j] > *norm) *norm = tmp2[j];
1898       }
1899       ierr = PetscFree(tmp);CHKERRQ(ierr);
1900       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1901       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1902     } else if (type == NORM_INFINITY) { /* max row norm */
1903       PetscReal ntemp = 0.0;
1904       for (j=0; j<aij->A->rmap->n; j++) {
1905         v   = amat->a + amat->i[j];
1906         sum = 0.0;
1907         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1908           sum += PetscAbsScalar(*v); v++;
1909         }
1910         v = bmat->a + bmat->i[j];
1911         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1912           sum += PetscAbsScalar(*v); v++;
1913         }
1914         if (sum > ntemp) ntemp = sum;
1915       }
1916       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1917       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1918     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1919   }
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1924 {
1925   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1926   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1927   PetscErrorCode ierr;
1928   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1929   PetscInt       cstart = A->cmap->rstart,ncol;
1930   Mat            B;
1931   MatScalar      *array;
1932 
1933   PetscFunctionBegin;
1934   if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1935 
1936   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1937   ai = Aloc->i; aj = Aloc->j;
1938   bi = Bloc->i; bj = Bloc->j;
1939   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1940     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1941     PetscSFNode          *oloc;
1942     PETSC_UNUSED PetscSF sf;
1943 
1944     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1945     /* compute d_nnz for preallocation */
1946     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1947     for (i=0; i<ai[ma]; i++) {
1948       d_nnz[aj[i]]++;
1949       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1950     }
1951     /* compute local off-diagonal contributions */
1952     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1953     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1954     /* map those to global */
1955     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1956     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1957     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1958     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1959     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1960     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1961     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1962 
1963     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1964     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1965     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1966     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1967     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1968     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1969   } else {
1970     B    = *matout;
1971     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1972     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1973   }
1974 
1975   /* copy over the A part */
1976   array = Aloc->a;
1977   row   = A->rmap->rstart;
1978   for (i=0; i<ma; i++) {
1979     ncol = ai[i+1]-ai[i];
1980     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1981     row++;
1982     array += ncol; aj += ncol;
1983   }
1984   aj = Aloc->j;
1985   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1986 
1987   /* copy over the B part */
1988   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1989   array = Bloc->a;
1990   row   = A->rmap->rstart;
1991   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1992   cols_tmp = cols;
1993   for (i=0; i<mb; i++) {
1994     ncol = bi[i+1]-bi[i];
1995     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1996     row++;
1997     array += ncol; cols_tmp += ncol;
1998   }
1999   ierr = PetscFree(cols);CHKERRQ(ierr);
2000 
2001   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2002   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2003   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
2004     *matout = B;
2005   } else {
2006     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
2007   }
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2012 {
2013   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2014   Mat            a    = aij->A,b = aij->B;
2015   PetscErrorCode ierr;
2016   PetscInt       s1,s2,s3;
2017 
2018   PetscFunctionBegin;
2019   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2020   if (rr) {
2021     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2022     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2023     /* Overlap communication with computation. */
2024     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2025   }
2026   if (ll) {
2027     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2028     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2029     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2030   }
2031   /* scale  the diagonal block */
2032   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2033 
2034   if (rr) {
2035     /* Do a scatter end and then right scale the off-diagonal block */
2036     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2037     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2038   }
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2043 {
2044   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2045   PetscErrorCode ierr;
2046 
2047   PetscFunctionBegin;
2048   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2053 {
2054   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2055   Mat            a,b,c,d;
2056   PetscBool      flg;
2057   PetscErrorCode ierr;
2058 
2059   PetscFunctionBegin;
2060   a = matA->A; b = matA->B;
2061   c = matB->A; d = matB->B;
2062 
2063   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2064   if (flg) {
2065     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2066   }
2067   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2072 {
2073   PetscErrorCode ierr;
2074   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2075   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2076 
2077   PetscFunctionBegin;
2078   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2079   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2080     /* because of the column compression in the off-processor part of the matrix a->B,
2081        the number of columns in a->B and b->B may be different, hence we cannot call
2082        the MatCopy() directly on the two parts. If need be, we can provide a more
2083        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2084        then copying the submatrices */
2085     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2086   } else {
2087     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2088     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2089   }
2090   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2091   PetscFunctionReturn(0);
2092 }
2093 
2094 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2095 {
2096   PetscErrorCode ierr;
2097 
2098   PetscFunctionBegin;
2099   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 /*
2104    Computes the number of nonzeros per row needed for preallocation when X and Y
2105    have different nonzero structure.
2106 */
2107 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2108 {
2109   PetscInt       i,j,k,nzx,nzy;
2110 
2111   PetscFunctionBegin;
2112   /* Set the number of nonzeros in the new matrix */
2113   for (i=0; i<m; i++) {
2114     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2115     nzx = xi[i+1] - xi[i];
2116     nzy = yi[i+1] - yi[i];
2117     nnz[i] = 0;
2118     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2119       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2120       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2121       nnz[i]++;
2122     }
2123     for (; k<nzy; k++) nnz[i]++;
2124   }
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2129 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2130 {
2131   PetscErrorCode ierr;
2132   PetscInt       m = Y->rmap->N;
2133   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2134   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2135 
2136   PetscFunctionBegin;
2137   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2138   PetscFunctionReturn(0);
2139 }
2140 
2141 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2142 {
2143   PetscErrorCode ierr;
2144   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2145   PetscBLASInt   bnz,one=1;
2146   Mat_SeqAIJ     *x,*y;
2147 
2148   PetscFunctionBegin;
2149   if (str == SAME_NONZERO_PATTERN) {
2150     PetscScalar alpha = a;
2151     x    = (Mat_SeqAIJ*)xx->A->data;
2152     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2153     y    = (Mat_SeqAIJ*)yy->A->data;
2154     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2155     x    = (Mat_SeqAIJ*)xx->B->data;
2156     y    = (Mat_SeqAIJ*)yy->B->data;
2157     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2158     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2159     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2160   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2161     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2162   } else {
2163     Mat      B;
2164     PetscInt *nnz_d,*nnz_o;
2165     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2166     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2167     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2168     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2169     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2170     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2171     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2172     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2173     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2174     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2175     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2176     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2177     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2178     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2179   }
2180   PetscFunctionReturn(0);
2181 }
2182 
2183 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2184 
2185 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2186 {
2187 #if defined(PETSC_USE_COMPLEX)
2188   PetscErrorCode ierr;
2189   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2190 
2191   PetscFunctionBegin;
2192   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2193   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2194 #else
2195   PetscFunctionBegin;
2196 #endif
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2201 {
2202   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2203   PetscErrorCode ierr;
2204 
2205   PetscFunctionBegin;
2206   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2207   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2208   PetscFunctionReturn(0);
2209 }
2210 
2211 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2212 {
2213   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2214   PetscErrorCode ierr;
2215 
2216   PetscFunctionBegin;
2217   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2218   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2223 {
2224   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2225   PetscErrorCode ierr;
2226   PetscInt       i,*idxb = 0;
2227   PetscScalar    *va,*vb;
2228   Vec            vtmp;
2229 
2230   PetscFunctionBegin;
2231   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2232   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2233   if (idx) {
2234     for (i=0; i<A->rmap->n; i++) {
2235       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2236     }
2237   }
2238 
2239   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2240   if (idx) {
2241     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2242   }
2243   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2244   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2245 
2246   for (i=0; i<A->rmap->n; i++) {
2247     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2248       va[i] = vb[i];
2249       if (idx) idx[i] = a->garray[idxb[i]];
2250     }
2251   }
2252 
2253   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2254   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2255   ierr = PetscFree(idxb);CHKERRQ(ierr);
2256   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2261 {
2262   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2263   PetscErrorCode ierr;
2264   PetscInt       i,*idxb = 0;
2265   PetscScalar    *va,*vb;
2266   Vec            vtmp;
2267 
2268   PetscFunctionBegin;
2269   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2270   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2271   if (idx) {
2272     for (i=0; i<A->cmap->n; i++) {
2273       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2274     }
2275   }
2276 
2277   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2278   if (idx) {
2279     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2280   }
2281   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2282   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2283 
2284   for (i=0; i<A->rmap->n; i++) {
2285     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2286       va[i] = vb[i];
2287       if (idx) idx[i] = a->garray[idxb[i]];
2288     }
2289   }
2290 
2291   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2292   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2293   ierr = PetscFree(idxb);CHKERRQ(ierr);
2294   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2295   PetscFunctionReturn(0);
2296 }
2297 
2298 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2299 {
2300   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2301   PetscInt       n      = A->rmap->n;
2302   PetscInt       cstart = A->cmap->rstart;
2303   PetscInt       *cmap  = mat->garray;
2304   PetscInt       *diagIdx, *offdiagIdx;
2305   Vec            diagV, offdiagV;
2306   PetscScalar    *a, *diagA, *offdiagA;
2307   PetscInt       r;
2308   PetscErrorCode ierr;
2309 
2310   PetscFunctionBegin;
2311   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2312   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2313   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2314   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2315   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2316   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2317   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2318   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2319   for (r = 0; r < n; ++r) {
2320     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2321       a[r]   = diagA[r];
2322       idx[r] = cstart + diagIdx[r];
2323     } else {
2324       a[r]   = offdiagA[r];
2325       idx[r] = cmap[offdiagIdx[r]];
2326     }
2327   }
2328   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2329   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2330   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2331   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2332   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2333   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2338 {
2339   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2340   PetscInt       n      = A->rmap->n;
2341   PetscInt       cstart = A->cmap->rstart;
2342   PetscInt       *cmap  = mat->garray;
2343   PetscInt       *diagIdx, *offdiagIdx;
2344   Vec            diagV, offdiagV;
2345   PetscScalar    *a, *diagA, *offdiagA;
2346   PetscInt       r;
2347   PetscErrorCode ierr;
2348 
2349   PetscFunctionBegin;
2350   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2351   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2352   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2353   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2354   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2355   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2356   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2357   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2358   for (r = 0; r < n; ++r) {
2359     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2360       a[r]   = diagA[r];
2361       idx[r] = cstart + diagIdx[r];
2362     } else {
2363       a[r]   = offdiagA[r];
2364       idx[r] = cmap[offdiagIdx[r]];
2365     }
2366   }
2367   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2368   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2369   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2370   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2371   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2372   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2373   PetscFunctionReturn(0);
2374 }
2375 
2376 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2377 {
2378   PetscErrorCode ierr;
2379   Mat            *dummy;
2380 
2381   PetscFunctionBegin;
2382   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2383   *newmat = *dummy;
2384   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2389 {
2390   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2391   PetscErrorCode ierr;
2392 
2393   PetscFunctionBegin;
2394   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2395   A->factorerrortype = a->A->factorerrortype;
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2400 {
2401   PetscErrorCode ierr;
2402   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2403 
2404   PetscFunctionBegin;
2405   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2406   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2407   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2408   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2413 {
2414   PetscFunctionBegin;
2415   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2416   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 /*@
2421    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2422 
2423    Collective on Mat
2424 
2425    Input Parameters:
2426 +    A - the matrix
2427 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2428 
2429  Level: advanced
2430 
2431 @*/
2432 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2433 {
2434   PetscErrorCode       ierr;
2435 
2436   PetscFunctionBegin;
2437   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2438   PetscFunctionReturn(0);
2439 }
2440 
2441 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2442 {
2443   PetscErrorCode       ierr;
2444   PetscBool            sc = PETSC_FALSE,flg;
2445 
2446   PetscFunctionBegin;
2447   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2448   ierr = PetscObjectOptionsBegin((PetscObject)A);
2449     if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2450     ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2451     if (flg) {
2452       ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2453     }
2454   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2455   PetscFunctionReturn(0);
2456 }
2457 
2458 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2459 {
2460   PetscErrorCode ierr;
2461   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2462   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2463 
2464   PetscFunctionBegin;
2465   if (!Y->preallocated) {
2466     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2467   } else if (!aij->nz) {
2468     PetscInt nonew = aij->nonew;
2469     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2470     aij->nonew = nonew;
2471   }
2472   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2477 {
2478   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2479   PetscErrorCode ierr;
2480 
2481   PetscFunctionBegin;
2482   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2483   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2484   if (d) {
2485     PetscInt rstart;
2486     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2487     *d += rstart;
2488 
2489   }
2490   PetscFunctionReturn(0);
2491 }
2492 
2493 
2494 /* -------------------------------------------------------------------*/
2495 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2496                                        MatGetRow_MPIAIJ,
2497                                        MatRestoreRow_MPIAIJ,
2498                                        MatMult_MPIAIJ,
2499                                 /* 4*/ MatMultAdd_MPIAIJ,
2500                                        MatMultTranspose_MPIAIJ,
2501                                        MatMultTransposeAdd_MPIAIJ,
2502                                        0,
2503                                        0,
2504                                        0,
2505                                 /*10*/ 0,
2506                                        0,
2507                                        0,
2508                                        MatSOR_MPIAIJ,
2509                                        MatTranspose_MPIAIJ,
2510                                 /*15*/ MatGetInfo_MPIAIJ,
2511                                        MatEqual_MPIAIJ,
2512                                        MatGetDiagonal_MPIAIJ,
2513                                        MatDiagonalScale_MPIAIJ,
2514                                        MatNorm_MPIAIJ,
2515                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2516                                        MatAssemblyEnd_MPIAIJ,
2517                                        MatSetOption_MPIAIJ,
2518                                        MatZeroEntries_MPIAIJ,
2519                                 /*24*/ MatZeroRows_MPIAIJ,
2520                                        0,
2521                                        0,
2522                                        0,
2523                                        0,
2524                                 /*29*/ MatSetUp_MPIAIJ,
2525                                        0,
2526                                        0,
2527                                        MatGetDiagonalBlock_MPIAIJ,
2528                                        0,
2529                                 /*34*/ MatDuplicate_MPIAIJ,
2530                                        0,
2531                                        0,
2532                                        0,
2533                                        0,
2534                                 /*39*/ MatAXPY_MPIAIJ,
2535                                        MatCreateSubMatrices_MPIAIJ,
2536                                        MatIncreaseOverlap_MPIAIJ,
2537                                        MatGetValues_MPIAIJ,
2538                                        MatCopy_MPIAIJ,
2539                                 /*44*/ MatGetRowMax_MPIAIJ,
2540                                        MatScale_MPIAIJ,
2541                                        MatShift_MPIAIJ,
2542                                        MatDiagonalSet_MPIAIJ,
2543                                        MatZeroRowsColumns_MPIAIJ,
2544                                 /*49*/ MatSetRandom_MPIAIJ,
2545                                        0,
2546                                        0,
2547                                        0,
2548                                        0,
2549                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2550                                        0,
2551                                        MatSetUnfactored_MPIAIJ,
2552                                        MatPermute_MPIAIJ,
2553                                        0,
2554                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2555                                        MatDestroy_MPIAIJ,
2556                                        MatView_MPIAIJ,
2557                                        0,
2558                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
2559                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
2560                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2561                                        0,
2562                                        0,
2563                                        0,
2564                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2565                                        MatGetRowMinAbs_MPIAIJ,
2566                                        0,
2567                                        0,
2568                                        0,
2569                                        0,
2570                                 /*75*/ MatFDColoringApply_AIJ,
2571                                        MatSetFromOptions_MPIAIJ,
2572                                        0,
2573                                        0,
2574                                        MatFindZeroDiagonals_MPIAIJ,
2575                                 /*80*/ 0,
2576                                        0,
2577                                        0,
2578                                 /*83*/ MatLoad_MPIAIJ,
2579                                        MatIsSymmetric_MPIAIJ,
2580                                        0,
2581                                        0,
2582                                        0,
2583                                        0,
2584                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2585                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2586                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2587                                        MatPtAP_MPIAIJ_MPIAIJ,
2588                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2589                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2590                                        0,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                 /*99*/ 0,
2595                                        0,
2596                                        0,
2597                                        MatConjugate_MPIAIJ,
2598                                        0,
2599                                 /*104*/MatSetValuesRow_MPIAIJ,
2600                                        MatRealPart_MPIAIJ,
2601                                        MatImaginaryPart_MPIAIJ,
2602                                        0,
2603                                        0,
2604                                 /*109*/0,
2605                                        0,
2606                                        MatGetRowMin_MPIAIJ,
2607                                        0,
2608                                        MatMissingDiagonal_MPIAIJ,
2609                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2610                                        0,
2611                                        MatGetGhosts_MPIAIJ,
2612                                        0,
2613                                        0,
2614                                 /*119*/0,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                        MatGetMultiProcBlock_MPIAIJ,
2619                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2620                                        MatGetColumnNorms_MPIAIJ,
2621                                        MatInvertBlockDiagonal_MPIAIJ,
2622                                        0,
2623                                        MatCreateSubMatricesMPI_MPIAIJ,
2624                                 /*129*/0,
2625                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
2626                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
2627                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2628                                        0,
2629                                 /*134*/0,
2630                                        0,
2631                                        MatRARt_MPIAIJ_MPIAIJ,
2632                                        0,
2633                                        0,
2634                                 /*139*/MatSetBlockSizes_MPIAIJ,
2635                                        0,
2636                                        0,
2637                                        MatFDColoringSetUp_MPIXAIJ,
2638                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2639                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ
2640 };
2641 
2642 /* ----------------------------------------------------------------------------------------*/
2643 
2644 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2645 {
2646   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2647   PetscErrorCode ierr;
2648 
2649   PetscFunctionBegin;
2650   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2651   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2652   PetscFunctionReturn(0);
2653 }
2654 
2655 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2656 {
2657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2658   PetscErrorCode ierr;
2659 
2660   PetscFunctionBegin;
2661   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2662   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2663   PetscFunctionReturn(0);
2664 }
2665 
2666 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2667 {
2668   Mat_MPIAIJ     *b;
2669   PetscErrorCode ierr;
2670 
2671   PetscFunctionBegin;
2672   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2673   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2674   b = (Mat_MPIAIJ*)B->data;
2675 
2676 #if defined(PETSC_USE_CTABLE)
2677   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2678 #else
2679   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2680 #endif
2681   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2682   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2683   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2684 
2685   /* Because the B will have been resized we simply destroy it and create a new one each time */
2686   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2687   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2688   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2689   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2690   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2691   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2692 
2693   if (!B->preallocated) {
2694     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2695     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2696     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2697     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2698     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2699   }
2700 
2701   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2702   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2703   B->preallocated  = PETSC_TRUE;
2704   B->was_assembled = PETSC_FALSE;
2705   B->assembled     = PETSC_FALSE;;
2706   PetscFunctionReturn(0);
2707 }
2708 
2709 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2710 {
2711   Mat_MPIAIJ     *b;
2712   PetscErrorCode ierr;
2713 
2714   PetscFunctionBegin;
2715   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2716   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2717   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2718   b = (Mat_MPIAIJ*)B->data;
2719 
2720 #if defined(PETSC_USE_CTABLE)
2721   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2722 #else
2723   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2724 #endif
2725   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2726   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2727   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2728 
2729   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2730   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2731   B->preallocated  = PETSC_TRUE;
2732   B->was_assembled = PETSC_FALSE;
2733   B->assembled = PETSC_FALSE;
2734   PetscFunctionReturn(0);
2735 }
2736 
2737 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2738 {
2739   Mat            mat;
2740   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2741   PetscErrorCode ierr;
2742 
2743   PetscFunctionBegin;
2744   *newmat = 0;
2745   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2746   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2747   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2748   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2749   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2750   a       = (Mat_MPIAIJ*)mat->data;
2751 
2752   mat->factortype   = matin->factortype;
2753   mat->assembled    = PETSC_TRUE;
2754   mat->insertmode   = NOT_SET_VALUES;
2755   mat->preallocated = PETSC_TRUE;
2756 
2757   a->size         = oldmat->size;
2758   a->rank         = oldmat->rank;
2759   a->donotstash   = oldmat->donotstash;
2760   a->roworiented  = oldmat->roworiented;
2761   a->rowindices   = 0;
2762   a->rowvalues    = 0;
2763   a->getrowactive = PETSC_FALSE;
2764 
2765   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2766   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2767 
2768   if (oldmat->colmap) {
2769 #if defined(PETSC_USE_CTABLE)
2770     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2771 #else
2772     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2773     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2774     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2775 #endif
2776   } else a->colmap = 0;
2777   if (oldmat->garray) {
2778     PetscInt len;
2779     len  = oldmat->B->cmap->n;
2780     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2781     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2782     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2783   } else a->garray = 0;
2784 
2785   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2786   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2787   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2788   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2789 
2790   if (oldmat->Mvctx_mpi1) {
2791     ierr    = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr);
2792     ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr);
2793   }
2794 
2795   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2796   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2797   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2798   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2799   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2800   *newmat = mat;
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2805 {
2806   PetscScalar    *vals,*svals;
2807   MPI_Comm       comm;
2808   PetscErrorCode ierr;
2809   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2810   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0;
2811   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2812   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
2813   PetscInt       cend,cstart,n,*rowners;
2814   int            fd;
2815   PetscInt       bs = newMat->rmap->bs;
2816 
2817   PetscFunctionBegin;
2818   /* force binary viewer to load .info file if it has not yet done so */
2819   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2820   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2821   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2822   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2823   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2824   if (!rank) {
2825     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2826     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2827     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ");
2828   }
2829 
2830   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr);
2831   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2832   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2833   if (bs < 0) bs = 1;
2834 
2835   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2836   M    = header[1]; N = header[2];
2837 
2838   /* If global sizes are set, check if they are consistent with that given in the file */
2839   if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M);
2840   if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N);
2841 
2842   /* determine ownership of all (block) rows */
2843   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
2844   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
2845   else m = newMat->rmap->n; /* Set by user */
2846 
2847   ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr);
2848   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2849 
2850   /* First process needs enough room for process with most rows */
2851   if (!rank) {
2852     mmax = rowners[1];
2853     for (i=2; i<=size; i++) {
2854       mmax = PetscMax(mmax, rowners[i]);
2855     }
2856   } else mmax = -1;             /* unused, but compilers complain */
2857 
2858   rowners[0] = 0;
2859   for (i=2; i<=size; i++) {
2860     rowners[i] += rowners[i-1];
2861   }
2862   rstart = rowners[rank];
2863   rend   = rowners[rank+1];
2864 
2865   /* distribute row lengths to all processors */
2866   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
2867   if (!rank) {
2868     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2869     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
2870     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2871     for (j=0; j<m; j++) {
2872       procsnz[0] += ourlens[j];
2873     }
2874     for (i=1; i<size; i++) {
2875       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2876       /* calculate the number of nonzeros on each processor */
2877       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2878         procsnz[i] += rowlengths[j];
2879       }
2880       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2881     }
2882     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2883   } else {
2884     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2885   }
2886 
2887   if (!rank) {
2888     /* determine max buffer needed and allocate it */
2889     maxnz = 0;
2890     for (i=0; i<size; i++) {
2891       maxnz = PetscMax(maxnz,procsnz[i]);
2892     }
2893     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2894 
2895     /* read in my part of the matrix column indices  */
2896     nz   = procsnz[0];
2897     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2898     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2899 
2900     /* read in every one elses and ship off */
2901     for (i=1; i<size; i++) {
2902       nz   = procsnz[i];
2903       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2904       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2905     }
2906     ierr = PetscFree(cols);CHKERRQ(ierr);
2907   } else {
2908     /* determine buffer space needed for message */
2909     nz = 0;
2910     for (i=0; i<m; i++) {
2911       nz += ourlens[i];
2912     }
2913     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
2914 
2915     /* receive message of column indices*/
2916     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
2917   }
2918 
2919   /* determine column ownership if matrix is not square */
2920   if (N != M) {
2921     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
2922     else n = newMat->cmap->n;
2923     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2924     cstart = cend - n;
2925   } else {
2926     cstart = rstart;
2927     cend   = rend;
2928     n      = cend - cstart;
2929   }
2930 
2931   /* loop over local rows, determining number of off diagonal entries */
2932   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2933   jj   = 0;
2934   for (i=0; i<m; i++) {
2935     for (j=0; j<ourlens[i]; j++) {
2936       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2937       jj++;
2938     }
2939   }
2940 
2941   for (i=0; i<m; i++) {
2942     ourlens[i] -= offlens[i];
2943   }
2944   ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
2945 
2946   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
2947 
2948   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
2949 
2950   for (i=0; i<m; i++) {
2951     ourlens[i] += offlens[i];
2952   }
2953 
2954   if (!rank) {
2955     ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr);
2956 
2957     /* read in my part of the matrix numerical values  */
2958     nz   = procsnz[0];
2959     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2960 
2961     /* insert into matrix */
2962     jj      = rstart;
2963     smycols = mycols;
2964     svals   = vals;
2965     for (i=0; i<m; i++) {
2966       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2967       smycols += ourlens[i];
2968       svals   += ourlens[i];
2969       jj++;
2970     }
2971 
2972     /* read in other processors and ship out */
2973     for (i=1; i<size; i++) {
2974       nz   = procsnz[i];
2975       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2976       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2977     }
2978     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2979   } else {
2980     /* receive numeric values */
2981     ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr);
2982 
2983     /* receive message of values*/
2984     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
2985 
2986     /* insert into matrix */
2987     jj      = rstart;
2988     smycols = mycols;
2989     svals   = vals;
2990     for (i=0; i<m; i++) {
2991       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2992       smycols += ourlens[i];
2993       svals   += ourlens[i];
2994       jj++;
2995     }
2996   }
2997   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2998   ierr = PetscFree(vals);CHKERRQ(ierr);
2999   ierr = PetscFree(mycols);CHKERRQ(ierr);
3000   ierr = PetscFree(rowners);CHKERRQ(ierr);
3001   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3002   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3003   PetscFunctionReturn(0);
3004 }
3005 
3006 /* Not scalable because of ISAllGather() unless getting all columns. */
3007 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3008 {
3009   PetscErrorCode ierr;
3010   IS             iscol_local;
3011   PetscBool      isstride;
3012   PetscMPIInt    lisstride=0,gisstride;
3013 
3014   PetscFunctionBegin;
3015   /* check if we are grabbing all columns*/
3016   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3017 
3018   if (isstride) {
3019     PetscInt  start,len,mstart,mlen;
3020     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3021     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3022     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3023     if (mstart == start && mlen-mstart == len) lisstride = 1;
3024   }
3025 
3026   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3027   if (gisstride) {
3028     PetscInt N;
3029     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3030     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr);
3031     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3032     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3033   } else {
3034     PetscInt cbs;
3035     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3036     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3037     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3038   }
3039 
3040   *isseq = iscol_local;
3041   PetscFunctionReturn(0);
3042 }
3043 
3044 /*
3045  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3046  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3047 
3048  Input Parameters:
3049    mat - matrix
3050    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3051            i.e., mat->rstart <= isrow[i] < mat->rend
3052    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3053            i.e., mat->cstart <= iscol[i] < mat->cend
3054  Output Parameter:
3055    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3056    iscol_o - sequential column index set for retrieving mat->B
3057    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3058  */
3059 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3060 {
3061   PetscErrorCode ierr;
3062   Vec            x,cmap;
3063   const PetscInt *is_idx;
3064   PetscScalar    *xarray,*cmaparray;
3065   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3066   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3067   Mat            B=a->B;
3068   Vec            lvec=a->lvec,lcmap;
3069   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3070   MPI_Comm       comm;
3071   VecScatter     Mvctx=a->Mvctx;
3072 
3073   PetscFunctionBegin;
3074   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3075   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3076 
3077   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3078   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3079   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3080   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3081   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3082 
3083   /* Get start indices */
3084   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3085   isstart -= ncols;
3086   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3087 
3088   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3089   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3090   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3091   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3092   for (i=0; i<ncols; i++) {
3093     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3094     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3095     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3096   }
3097   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3098   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3099   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3100 
3101   /* Get iscol_d */
3102   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3103   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3104   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3105 
3106   /* Get isrow_d */
3107   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3108   rstart = mat->rmap->rstart;
3109   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3110   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3111   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3112   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3113 
3114   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3115   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3116   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3117 
3118   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3119   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3120   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3121 
3122   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3123 
3124   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3125   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3126 
3127   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3128   /* off-process column indices */
3129   count = 0;
3130   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3131   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3132 
3133   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3134   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3135   for (i=0; i<Bn; i++) {
3136     if (PetscRealPart(xarray[i]) > -1.0) {
3137       idx[count]     = i;                   /* local column index in off-diagonal part B */
3138       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3139       count++;
3140     }
3141   }
3142   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3143   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3144 
3145   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3146   /* cannot ensure iscol_o has same blocksize as iscol! */
3147 
3148   ierr = PetscFree(idx);CHKERRQ(ierr);
3149   *garray = cmap1;
3150 
3151   ierr = VecDestroy(&x);CHKERRQ(ierr);
3152   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3153   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3154   PetscFunctionReturn(0);
3155 }
3156 
3157 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3158 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3159 {
3160   PetscErrorCode ierr;
3161   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3162   Mat            M = NULL;
3163   MPI_Comm       comm;
3164   IS             iscol_d,isrow_d,iscol_o;
3165   Mat            Asub = NULL,Bsub = NULL;
3166   PetscInt       n;
3167 
3168   PetscFunctionBegin;
3169   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3170 
3171   if (call == MAT_REUSE_MATRIX) {
3172     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3173     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3174     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3175 
3176     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3177     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3178 
3179     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3180     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3181 
3182     /* Update diagonal and off-diagonal portions of submat */
3183     asub = (Mat_MPIAIJ*)(*submat)->data;
3184     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3185     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3186     if (n) {
3187       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3188     }
3189     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3190     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3191 
3192   } else { /* call == MAT_INITIAL_MATRIX) */
3193     const PetscInt *garray;
3194     PetscInt        BsubN;
3195 
3196     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3197     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3198 
3199     /* Create local submatrices Asub and Bsub */
3200     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3201     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3202 
3203     /* Create submatrix M */
3204     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3205 
3206     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3207     asub = (Mat_MPIAIJ*)M->data;
3208 
3209     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3210     n = asub->B->cmap->N;
3211     if (BsubN > n) {
3212       /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */
3213       const PetscInt *idx;
3214       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3215       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3216 
3217       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3218       j = 0;
3219       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3220       for (i=0; i<n; i++) {
3221         if (j >= BsubN) break;
3222         while (subgarray[i] > garray[j]) j++;
3223 
3224         if (subgarray[i] == garray[j]) {
3225           idx_new[i] = idx[j++];
3226         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3227       }
3228       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3229 
3230       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3231       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3232 
3233     } else if (BsubN < n) {
3234       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3235     }
3236 
3237     ierr = PetscFree(garray);CHKERRQ(ierr);
3238     *submat = M;
3239 
3240     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3241     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3242     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3243 
3244     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3245     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3246 
3247     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3248     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3249   }
3250   PetscFunctionReturn(0);
3251 }
3252 
3253 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3254 {
3255   PetscErrorCode ierr;
3256   IS             iscol_local=NULL,isrow_d;
3257   PetscInt       csize;
3258   PetscInt       n,i,j,start,end;
3259   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3260   MPI_Comm       comm;
3261 
3262   PetscFunctionBegin;
3263   /* If isrow has same processor distribution as mat,
3264      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3265   if (call == MAT_REUSE_MATRIX) {
3266     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3267     if (isrow_d) {
3268       sameRowDist  = PETSC_TRUE;
3269       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3270     } else {
3271       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3272       if (iscol_local) {
3273         sameRowDist  = PETSC_TRUE;
3274         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3275       }
3276     }
3277   } else {
3278     /* Check if isrow has same processor distribution as mat */
3279     sameDist[0] = PETSC_FALSE;
3280     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3281     if (!n) {
3282       sameDist[0] = PETSC_TRUE;
3283     } else {
3284       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3285       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3286       if (i >= start && j < end) {
3287         sameDist[0] = PETSC_TRUE;
3288       }
3289     }
3290 
3291     /* Check if iscol has same processor distribution as mat */
3292     sameDist[1] = PETSC_FALSE;
3293     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3294     if (!n) {
3295       sameDist[1] = PETSC_TRUE;
3296     } else {
3297       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3298       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3299       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3300     }
3301 
3302     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3303     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3304     sameRowDist = tsameDist[0];
3305   }
3306 
3307   if (sameRowDist) {
3308     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3309       /* isrow and iscol have same processor distribution as mat */
3310       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3311       PetscFunctionReturn(0);
3312     } else { /* sameRowDist */
3313       /* isrow has same processor distribution as mat */
3314       if (call == MAT_INITIAL_MATRIX) {
3315         PetscBool sorted;
3316         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3317         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3318         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3319         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3320 
3321         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3322         if (sorted) {
3323           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3324           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3325           PetscFunctionReturn(0);
3326         }
3327       } else { /* call == MAT_REUSE_MATRIX */
3328         IS    iscol_sub;
3329         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3330         if (iscol_sub) {
3331           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3332           PetscFunctionReturn(0);
3333         }
3334       }
3335     }
3336   }
3337 
3338   /* General case: iscol -> iscol_local which has global size of iscol */
3339   if (call == MAT_REUSE_MATRIX) {
3340     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3341     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3342   } else {
3343     if (!iscol_local) {
3344       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3345     }
3346   }
3347 
3348   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3349   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3350 
3351   if (call == MAT_INITIAL_MATRIX) {
3352     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3353     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3354   }
3355   PetscFunctionReturn(0);
3356 }
3357 
3358 /*@C
3359      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3360          and "off-diagonal" part of the matrix in CSR format.
3361 
3362    Collective on MPI_Comm
3363 
3364    Input Parameters:
3365 +  comm - MPI communicator
3366 .  A - "diagonal" portion of matrix
3367 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3368 -  garray - global index of B columns
3369 
3370    Output Parameter:
3371 .   mat - the matrix, with input A as its local diagonal matrix
3372    Level: advanced
3373 
3374    Notes:
3375        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3376        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3377 
3378 .seealso: MatCreateMPIAIJWithSplitArrays()
3379 @*/
3380 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3381 {
3382   PetscErrorCode ierr;
3383   Mat_MPIAIJ     *maij;
3384   Mat_SeqAIJ     *b=(Mat_SeqAIJ*)B->data,*bnew;
3385   PetscInt       *oi=b->i,*oj=b->j,i,nz,col;
3386   PetscScalar    *oa=b->a;
3387   Mat            Bnew;
3388   PetscInt       m,n,N;
3389 
3390   PetscFunctionBegin;
3391   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3392   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3393   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3394   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3395   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3396   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3397 
3398   /* Get global columns of mat */
3399   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3400 
3401   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3402   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3403   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3404   maij = (Mat_MPIAIJ*)(*mat)->data;
3405 
3406   (*mat)->preallocated = PETSC_TRUE;
3407 
3408   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3409   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3410 
3411   /* Set A as diagonal portion of *mat */
3412   maij->A = A;
3413 
3414   nz = oi[m];
3415   for (i=0; i<nz; i++) {
3416     col   = oj[i];
3417     oj[i] = garray[col];
3418   }
3419 
3420    /* Set Bnew as off-diagonal portion of *mat */
3421   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr);
3422   bnew        = (Mat_SeqAIJ*)Bnew->data;
3423   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3424   maij->B     = Bnew;
3425 
3426   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3427 
3428   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3429   b->free_a       = PETSC_FALSE;
3430   b->free_ij      = PETSC_FALSE;
3431   ierr = MatDestroy(&B);CHKERRQ(ierr);
3432 
3433   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3434   bnew->free_a       = PETSC_TRUE;
3435   bnew->free_ij      = PETSC_TRUE;
3436 
3437   /* condense columns of maij->B */
3438   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3439   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3440   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3441   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3442   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3443   PetscFunctionReturn(0);
3444 }
3445 
3446 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3447 
3448 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3449 {
3450   PetscErrorCode ierr;
3451   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3452   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3453   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3454   Mat            M,Msub,B=a->B;
3455   MatScalar      *aa;
3456   Mat_SeqAIJ     *aij;
3457   PetscInt       *garray = a->garray,*colsub,Ncols;
3458   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3459   IS             iscol_sub,iscmap;
3460   const PetscInt *is_idx,*cmap;
3461   PetscBool      allcolumns=PETSC_FALSE;
3462   MPI_Comm       comm;
3463 
3464   PetscFunctionBegin;
3465   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3466 
3467   if (call == MAT_REUSE_MATRIX) {
3468     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3469     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3470     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3471 
3472     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3473     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3474 
3475     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3476     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3477 
3478     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3479 
3480   } else { /* call == MAT_INITIAL_MATRIX) */
3481     PetscBool flg;
3482 
3483     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3484     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3485 
3486     /* (1) iscol -> nonscalable iscol_local */
3487     /* Check for special case: each processor gets entire matrix columns */
3488     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3489     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3490     if (allcolumns) {
3491       iscol_sub = iscol_local;
3492       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3493       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3494 
3495     } else {
3496       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3497       PetscInt *idx,*cmap1,k;
3498       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3499       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3500       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3501       count = 0;
3502       k     = 0;
3503       for (i=0; i<Ncols; i++) {
3504         j = is_idx[i];
3505         if (j >= cstart && j < cend) {
3506           /* diagonal part of mat */
3507           idx[count]     = j;
3508           cmap1[count++] = i; /* column index in submat */
3509         } else if (Bn) {
3510           /* off-diagonal part of mat */
3511           if (j == garray[k]) {
3512             idx[count]     = j;
3513             cmap1[count++] = i;  /* column index in submat */
3514           } else if (j > garray[k]) {
3515             while (j > garray[k] && k < Bn-1) k++;
3516             if (j == garray[k]) {
3517               idx[count]     = j;
3518               cmap1[count++] = i; /* column index in submat */
3519             }
3520           }
3521         }
3522       }
3523       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3524 
3525       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3526       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3527       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3528 
3529       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3530     }
3531 
3532     /* (3) Create sequential Msub */
3533     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3534   }
3535 
3536   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3537   aij  = (Mat_SeqAIJ*)(Msub)->data;
3538   ii   = aij->i;
3539   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3540 
3541   /*
3542       m - number of local rows
3543       Ncols - number of columns (same on all processors)
3544       rstart - first row in new global matrix generated
3545   */
3546   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3547 
3548   if (call == MAT_INITIAL_MATRIX) {
3549     /* (4) Create parallel newmat */
3550     PetscMPIInt    rank,size;
3551     PetscInt       csize;
3552 
3553     ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3554     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3555 
3556     /*
3557         Determine the number of non-zeros in the diagonal and off-diagonal
3558         portions of the matrix in order to do correct preallocation
3559     */
3560 
3561     /* first get start and end of "diagonal" columns */
3562     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3563     if (csize == PETSC_DECIDE) {
3564       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3565       if (mglobal == Ncols) { /* square matrix */
3566         nlocal = m;
3567       } else {
3568         nlocal = Ncols/size + ((Ncols % size) > rank);
3569       }
3570     } else {
3571       nlocal = csize;
3572     }
3573     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3574     rstart = rend - nlocal;
3575     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3576 
3577     /* next, compute all the lengths */
3578     jj    = aij->j;
3579     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3580     olens = dlens + m;
3581     for (i=0; i<m; i++) {
3582       jend = ii[i+1] - ii[i];
3583       olen = 0;
3584       dlen = 0;
3585       for (j=0; j<jend; j++) {
3586         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3587         else dlen++;
3588         jj++;
3589       }
3590       olens[i] = olen;
3591       dlens[i] = dlen;
3592     }
3593 
3594     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3595     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3596 
3597     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3598     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3599     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3600     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3601     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3602     ierr = PetscFree(dlens);CHKERRQ(ierr);
3603 
3604   } else { /* call == MAT_REUSE_MATRIX */
3605     M    = *newmat;
3606     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3607     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3608     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3609     /*
3610          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3611        rather than the slower MatSetValues().
3612     */
3613     M->was_assembled = PETSC_TRUE;
3614     M->assembled     = PETSC_FALSE;
3615   }
3616 
3617   /* (5) Set values of Msub to *newmat */
3618   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3619   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3620 
3621   jj   = aij->j;
3622   aa   = aij->a;
3623   for (i=0; i<m; i++) {
3624     row = rstart + i;
3625     nz  = ii[i+1] - ii[i];
3626     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3627     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3628     jj += nz; aa += nz;
3629   }
3630   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3631 
3632   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3633   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3634 
3635   ierr = PetscFree(colsub);CHKERRQ(ierr);
3636 
3637   /* save Msub, iscol_sub and iscmap used in processor for next request */
3638   if (call ==  MAT_INITIAL_MATRIX) {
3639     *newmat = M;
3640     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3641     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3642 
3643     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3644     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3645 
3646     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3647     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3648 
3649     if (iscol_local) {
3650       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3651       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3652     }
3653   }
3654   PetscFunctionReturn(0);
3655 }
3656 
3657 /*
3658     Not great since it makes two copies of the submatrix, first an SeqAIJ
3659   in local and then by concatenating the local matrices the end result.
3660   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3661 
3662   Note: This requires a sequential iscol with all indices.
3663 */
3664 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3665 {
3666   PetscErrorCode ierr;
3667   PetscMPIInt    rank,size;
3668   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3669   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3670   Mat            M,Mreuse;
3671   MatScalar      *aa,*vwork;
3672   MPI_Comm       comm;
3673   Mat_SeqAIJ     *aij;
3674   PetscBool      colflag,allcolumns=PETSC_FALSE;
3675 
3676   PetscFunctionBegin;
3677   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3678   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3679   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3680 
3681   /* Check for special case: each processor gets entire matrix columns */
3682   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3683   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3684   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3685 
3686   if (call ==  MAT_REUSE_MATRIX) {
3687     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3688     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3689     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3690   } else {
3691     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3692   }
3693 
3694   /*
3695       m - number of local rows
3696       n - number of columns (same on all processors)
3697       rstart - first row in new global matrix generated
3698   */
3699   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3700   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3701   if (call == MAT_INITIAL_MATRIX) {
3702     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3703     ii  = aij->i;
3704     jj  = aij->j;
3705 
3706     /*
3707         Determine the number of non-zeros in the diagonal and off-diagonal
3708         portions of the matrix in order to do correct preallocation
3709     */
3710 
3711     /* first get start and end of "diagonal" columns */
3712     if (csize == PETSC_DECIDE) {
3713       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3714       if (mglobal == n) { /* square matrix */
3715         nlocal = m;
3716       } else {
3717         nlocal = n/size + ((n % size) > rank);
3718       }
3719     } else {
3720       nlocal = csize;
3721     }
3722     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3723     rstart = rend - nlocal;
3724     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3725 
3726     /* next, compute all the lengths */
3727     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3728     olens = dlens + m;
3729     for (i=0; i<m; i++) {
3730       jend = ii[i+1] - ii[i];
3731       olen = 0;
3732       dlen = 0;
3733       for (j=0; j<jend; j++) {
3734         if (*jj < rstart || *jj >= rend) olen++;
3735         else dlen++;
3736         jj++;
3737       }
3738       olens[i] = olen;
3739       dlens[i] = dlen;
3740     }
3741     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3742     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3743     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3744     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3745     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3746     ierr = PetscFree(dlens);CHKERRQ(ierr);
3747   } else {
3748     PetscInt ml,nl;
3749 
3750     M    = *newmat;
3751     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3752     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3753     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3754     /*
3755          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3756        rather than the slower MatSetValues().
3757     */
3758     M->was_assembled = PETSC_TRUE;
3759     M->assembled     = PETSC_FALSE;
3760   }
3761   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3762   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3763   ii   = aij->i;
3764   jj   = aij->j;
3765   aa   = aij->a;
3766   for (i=0; i<m; i++) {
3767     row   = rstart + i;
3768     nz    = ii[i+1] - ii[i];
3769     cwork = jj;     jj += nz;
3770     vwork = aa;     aa += nz;
3771     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3772   }
3773 
3774   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3775   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3776   *newmat = M;
3777 
3778   /* save submatrix used in processor for next request */
3779   if (call ==  MAT_INITIAL_MATRIX) {
3780     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3781     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3782   }
3783   PetscFunctionReturn(0);
3784 }
3785 
3786 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3787 {
3788   PetscInt       m,cstart, cend,j,nnz,i,d;
3789   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3790   const PetscInt *JJ;
3791   PetscScalar    *values;
3792   PetscErrorCode ierr;
3793   PetscBool      nooffprocentries;
3794 
3795   PetscFunctionBegin;
3796   if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3797 
3798   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3799   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3800   m      = B->rmap->n;
3801   cstart = B->cmap->rstart;
3802   cend   = B->cmap->rend;
3803   rstart = B->rmap->rstart;
3804 
3805   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3806 
3807 #if defined(PETSC_USE_DEBUG)
3808   for (i=0; i<m; i++) {
3809     nnz = Ii[i+1]- Ii[i];
3810     JJ  = J + Ii[i];
3811     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3812     if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3813     if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3814   }
3815 #endif
3816 
3817   for (i=0; i<m; i++) {
3818     nnz     = Ii[i+1]- Ii[i];
3819     JJ      = J + Ii[i];
3820     nnz_max = PetscMax(nnz_max,nnz);
3821     d       = 0;
3822     for (j=0; j<nnz; j++) {
3823       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3824     }
3825     d_nnz[i] = d;
3826     o_nnz[i] = nnz - d;
3827   }
3828   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3829   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3830 
3831   if (v) values = (PetscScalar*)v;
3832   else {
3833     ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr);
3834   }
3835 
3836   for (i=0; i<m; i++) {
3837     ii   = i + rstart;
3838     nnz  = Ii[i+1]- Ii[i];
3839     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3840   }
3841   nooffprocentries    = B->nooffprocentries;
3842   B->nooffprocentries = PETSC_TRUE;
3843   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3844   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3845   B->nooffprocentries = nooffprocentries;
3846 
3847   if (!v) {
3848     ierr = PetscFree(values);CHKERRQ(ierr);
3849   }
3850   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3851   PetscFunctionReturn(0);
3852 }
3853 
3854 /*@
3855    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3856    (the default parallel PETSc format).
3857 
3858    Collective on MPI_Comm
3859 
3860    Input Parameters:
3861 +  B - the matrix
3862 .  i - the indices into j for the start of each local row (starts with zero)
3863 .  j - the column indices for each local row (starts with zero)
3864 -  v - optional values in the matrix
3865 
3866    Level: developer
3867 
3868    Notes:
3869        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3870      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3871      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3872 
3873        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3874 
3875        The format which is used for the sparse matrix input, is equivalent to a
3876     row-major ordering.. i.e for the following matrix, the input data expected is
3877     as shown
3878 
3879 $        1 0 0
3880 $        2 0 3     P0
3881 $       -------
3882 $        4 5 6     P1
3883 $
3884 $     Process0 [P0]: rows_owned=[0,1]
3885 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3886 $        j =  {0,0,2}  [size = 3]
3887 $        v =  {1,2,3}  [size = 3]
3888 $
3889 $     Process1 [P1]: rows_owned=[2]
3890 $        i =  {0,3}    [size = nrow+1  = 1+1]
3891 $        j =  {0,1,2}  [size = 3]
3892 $        v =  {4,5,6}  [size = 3]
3893 
3894 .keywords: matrix, aij, compressed row, sparse, parallel
3895 
3896 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3897           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3898 @*/
3899 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3900 {
3901   PetscErrorCode ierr;
3902 
3903   PetscFunctionBegin;
3904   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3905   PetscFunctionReturn(0);
3906 }
3907 
3908 /*@C
3909    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3910    (the default parallel PETSc format).  For good matrix assembly performance
3911    the user should preallocate the matrix storage by setting the parameters
3912    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3913    performance can be increased by more than a factor of 50.
3914 
3915    Collective on MPI_Comm
3916 
3917    Input Parameters:
3918 +  B - the matrix
3919 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3920            (same value is used for all local rows)
3921 .  d_nnz - array containing the number of nonzeros in the various rows of the
3922            DIAGONAL portion of the local submatrix (possibly different for each row)
3923            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3924            The size of this array is equal to the number of local rows, i.e 'm'.
3925            For matrices that will be factored, you must leave room for (and set)
3926            the diagonal entry even if it is zero.
3927 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3928            submatrix (same value is used for all local rows).
3929 -  o_nnz - array containing the number of nonzeros in the various rows of the
3930            OFF-DIAGONAL portion of the local submatrix (possibly different for
3931            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3932            structure. The size of this array is equal to the number
3933            of local rows, i.e 'm'.
3934 
3935    If the *_nnz parameter is given then the *_nz parameter is ignored
3936 
3937    The AIJ format (also called the Yale sparse matrix format or
3938    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3939    storage.  The stored row and column indices begin with zero.
3940    See Users-Manual: ch_mat for details.
3941 
3942    The parallel matrix is partitioned such that the first m0 rows belong to
3943    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3944    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3945 
3946    The DIAGONAL portion of the local submatrix of a processor can be defined
3947    as the submatrix which is obtained by extraction the part corresponding to
3948    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3949    first row that belongs to the processor, r2 is the last row belonging to
3950    the this processor, and c1-c2 is range of indices of the local part of a
3951    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3952    common case of a square matrix, the row and column ranges are the same and
3953    the DIAGONAL part is also square. The remaining portion of the local
3954    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3955 
3956    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3957 
3958    You can call MatGetInfo() to get information on how effective the preallocation was;
3959    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3960    You can also run with the option -info and look for messages with the string
3961    malloc in them to see if additional memory allocation was needed.
3962 
3963    Example usage:
3964 
3965    Consider the following 8x8 matrix with 34 non-zero values, that is
3966    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3967    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3968    as follows:
3969 
3970 .vb
3971             1  2  0  |  0  3  0  |  0  4
3972     Proc0   0  5  6  |  7  0  0  |  8  0
3973             9  0 10  | 11  0  0  | 12  0
3974     -------------------------------------
3975            13  0 14  | 15 16 17  |  0  0
3976     Proc1   0 18  0  | 19 20 21  |  0  0
3977             0  0  0  | 22 23  0  | 24  0
3978     -------------------------------------
3979     Proc2  25 26 27  |  0  0 28  | 29  0
3980            30  0  0  | 31 32 33  |  0 34
3981 .ve
3982 
3983    This can be represented as a collection of submatrices as:
3984 
3985 .vb
3986       A B C
3987       D E F
3988       G H I
3989 .ve
3990 
3991    Where the submatrices A,B,C are owned by proc0, D,E,F are
3992    owned by proc1, G,H,I are owned by proc2.
3993 
3994    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3995    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3996    The 'M','N' parameters are 8,8, and have the same values on all procs.
3997 
3998    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3999    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4000    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4001    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4002    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4003    matrix, ans [DF] as another SeqAIJ matrix.
4004 
4005    When d_nz, o_nz parameters are specified, d_nz storage elements are
4006    allocated for every row of the local diagonal submatrix, and o_nz
4007    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4008    One way to choose d_nz and o_nz is to use the max nonzerors per local
4009    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4010    In this case, the values of d_nz,o_nz are:
4011 .vb
4012      proc0 : dnz = 2, o_nz = 2
4013      proc1 : dnz = 3, o_nz = 2
4014      proc2 : dnz = 1, o_nz = 4
4015 .ve
4016    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4017    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4018    for proc3. i.e we are using 12+15+10=37 storage locations to store
4019    34 values.
4020 
4021    When d_nnz, o_nnz parameters are specified, the storage is specified
4022    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4023    In the above case the values for d_nnz,o_nnz are:
4024 .vb
4025      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4026      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4027      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4028 .ve
4029    Here the space allocated is sum of all the above values i.e 34, and
4030    hence pre-allocation is perfect.
4031 
4032    Level: intermediate
4033 
4034 .keywords: matrix, aij, compressed row, sparse, parallel
4035 
4036 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4037           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4038 @*/
4039 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4040 {
4041   PetscErrorCode ierr;
4042 
4043   PetscFunctionBegin;
4044   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4045   PetscValidType(B,1);
4046   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4047   PetscFunctionReturn(0);
4048 }
4049 
4050 /*@
4051      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4052          CSR format the local rows.
4053 
4054    Collective on MPI_Comm
4055 
4056    Input Parameters:
4057 +  comm - MPI communicator
4058 .  m - number of local rows (Cannot be PETSC_DECIDE)
4059 .  n - This value should be the same as the local size used in creating the
4060        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4061        calculated if N is given) For square matrices n is almost always m.
4062 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4063 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4064 .   i - row indices
4065 .   j - column indices
4066 -   a - matrix values
4067 
4068    Output Parameter:
4069 .   mat - the matrix
4070 
4071    Level: intermediate
4072 
4073    Notes:
4074        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4075      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4076      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4077 
4078        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4079 
4080        The format which is used for the sparse matrix input, is equivalent to a
4081     row-major ordering.. i.e for the following matrix, the input data expected is
4082     as shown
4083 
4084 $        1 0 0
4085 $        2 0 3     P0
4086 $       -------
4087 $        4 5 6     P1
4088 $
4089 $     Process0 [P0]: rows_owned=[0,1]
4090 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4091 $        j =  {0,0,2}  [size = 3]
4092 $        v =  {1,2,3}  [size = 3]
4093 $
4094 $     Process1 [P1]: rows_owned=[2]
4095 $        i =  {0,3}    [size = nrow+1  = 1+1]
4096 $        j =  {0,1,2}  [size = 3]
4097 $        v =  {4,5,6}  [size = 3]
4098 
4099 .keywords: matrix, aij, compressed row, sparse, parallel
4100 
4101 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4102           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4103 @*/
4104 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4105 {
4106   PetscErrorCode ierr;
4107 
4108   PetscFunctionBegin;
4109   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4110   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4111   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4112   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4113   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4114   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4115   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4116   PetscFunctionReturn(0);
4117 }
4118 
4119 /*@C
4120    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4121    (the default parallel PETSc format).  For good matrix assembly performance
4122    the user should preallocate the matrix storage by setting the parameters
4123    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4124    performance can be increased by more than a factor of 50.
4125 
4126    Collective on MPI_Comm
4127 
4128    Input Parameters:
4129 +  comm - MPI communicator
4130 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4131            This value should be the same as the local size used in creating the
4132            y vector for the matrix-vector product y = Ax.
4133 .  n - This value should be the same as the local size used in creating the
4134        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4135        calculated if N is given) For square matrices n is almost always m.
4136 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4137 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4138 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4139            (same value is used for all local rows)
4140 .  d_nnz - array containing the number of nonzeros in the various rows of the
4141            DIAGONAL portion of the local submatrix (possibly different for each row)
4142            or NULL, if d_nz is used to specify the nonzero structure.
4143            The size of this array is equal to the number of local rows, i.e 'm'.
4144 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4145            submatrix (same value is used for all local rows).
4146 -  o_nnz - array containing the number of nonzeros in the various rows of the
4147            OFF-DIAGONAL portion of the local submatrix (possibly different for
4148            each row) or NULL, if o_nz is used to specify the nonzero
4149            structure. The size of this array is equal to the number
4150            of local rows, i.e 'm'.
4151 
4152    Output Parameter:
4153 .  A - the matrix
4154 
4155    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4156    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4157    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4158 
4159    Notes:
4160    If the *_nnz parameter is given then the *_nz parameter is ignored
4161 
4162    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4163    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4164    storage requirements for this matrix.
4165 
4166    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4167    processor than it must be used on all processors that share the object for
4168    that argument.
4169 
4170    The user MUST specify either the local or global matrix dimensions
4171    (possibly both).
4172 
4173    The parallel matrix is partitioned across processors such that the
4174    first m0 rows belong to process 0, the next m1 rows belong to
4175    process 1, the next m2 rows belong to process 2 etc.. where
4176    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4177    values corresponding to [m x N] submatrix.
4178 
4179    The columns are logically partitioned with the n0 columns belonging
4180    to 0th partition, the next n1 columns belonging to the next
4181    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4182 
4183    The DIAGONAL portion of the local submatrix on any given processor
4184    is the submatrix corresponding to the rows and columns m,n
4185    corresponding to the given processor. i.e diagonal matrix on
4186    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4187    etc. The remaining portion of the local submatrix [m x (N-n)]
4188    constitute the OFF-DIAGONAL portion. The example below better
4189    illustrates this concept.
4190 
4191    For a square global matrix we define each processor's diagonal portion
4192    to be its local rows and the corresponding columns (a square submatrix);
4193    each processor's off-diagonal portion encompasses the remainder of the
4194    local matrix (a rectangular submatrix).
4195 
4196    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4197 
4198    When calling this routine with a single process communicator, a matrix of
4199    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4200    type of communicator, use the construction mechanism
4201 .vb
4202      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4203 .ve
4204 
4205 $     MatCreate(...,&A);
4206 $     MatSetType(A,MATMPIAIJ);
4207 $     MatSetSizes(A, m,n,M,N);
4208 $     MatMPIAIJSetPreallocation(A,...);
4209 
4210    By default, this format uses inodes (identical nodes) when possible.
4211    We search for consecutive rows with the same nonzero structure, thereby
4212    reusing matrix information to achieve increased efficiency.
4213 
4214    Options Database Keys:
4215 +  -mat_no_inode  - Do not use inodes
4216 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4217 
4218 
4219 
4220    Example usage:
4221 
4222    Consider the following 8x8 matrix with 34 non-zero values, that is
4223    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4224    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4225    as follows
4226 
4227 .vb
4228             1  2  0  |  0  3  0  |  0  4
4229     Proc0   0  5  6  |  7  0  0  |  8  0
4230             9  0 10  | 11  0  0  | 12  0
4231     -------------------------------------
4232            13  0 14  | 15 16 17  |  0  0
4233     Proc1   0 18  0  | 19 20 21  |  0  0
4234             0  0  0  | 22 23  0  | 24  0
4235     -------------------------------------
4236     Proc2  25 26 27  |  0  0 28  | 29  0
4237            30  0  0  | 31 32 33  |  0 34
4238 .ve
4239 
4240    This can be represented as a collection of submatrices as
4241 
4242 .vb
4243       A B C
4244       D E F
4245       G H I
4246 .ve
4247 
4248    Where the submatrices A,B,C are owned by proc0, D,E,F are
4249    owned by proc1, G,H,I are owned by proc2.
4250 
4251    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4252    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4253    The 'M','N' parameters are 8,8, and have the same values on all procs.
4254 
4255    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4256    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4257    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4258    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4259    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4260    matrix, ans [DF] as another SeqAIJ matrix.
4261 
4262    When d_nz, o_nz parameters are specified, d_nz storage elements are
4263    allocated for every row of the local diagonal submatrix, and o_nz
4264    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4265    One way to choose d_nz and o_nz is to use the max nonzerors per local
4266    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4267    In this case, the values of d_nz,o_nz are
4268 .vb
4269      proc0 : dnz = 2, o_nz = 2
4270      proc1 : dnz = 3, o_nz = 2
4271      proc2 : dnz = 1, o_nz = 4
4272 .ve
4273    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4274    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4275    for proc3. i.e we are using 12+15+10=37 storage locations to store
4276    34 values.
4277 
4278    When d_nnz, o_nnz parameters are specified, the storage is specified
4279    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4280    In the above case the values for d_nnz,o_nnz are
4281 .vb
4282      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4283      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4284      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4285 .ve
4286    Here the space allocated is sum of all the above values i.e 34, and
4287    hence pre-allocation is perfect.
4288 
4289    Level: intermediate
4290 
4291 .keywords: matrix, aij, compressed row, sparse, parallel
4292 
4293 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4294           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4295 @*/
4296 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4297 {
4298   PetscErrorCode ierr;
4299   PetscMPIInt    size;
4300 
4301   PetscFunctionBegin;
4302   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4303   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4304   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4305   if (size > 1) {
4306     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4307     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4308   } else {
4309     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4310     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4311   }
4312   PetscFunctionReturn(0);
4313 }
4314 
4315 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4316 {
4317   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4318   PetscBool      flg;
4319   PetscErrorCode ierr;
4320 
4321   PetscFunctionBegin;
4322   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
4323   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4324   if (Ad)     *Ad     = a->A;
4325   if (Ao)     *Ao     = a->B;
4326   if (colmap) *colmap = a->garray;
4327   PetscFunctionReturn(0);
4328 }
4329 
4330 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4331 {
4332   PetscErrorCode ierr;
4333   PetscInt       m,N,i,rstart,nnz,Ii;
4334   PetscInt       *indx;
4335   PetscScalar    *values;
4336 
4337   PetscFunctionBegin;
4338   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4339   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4340     PetscInt       *dnz,*onz,sum,bs,cbs;
4341 
4342     if (n == PETSC_DECIDE) {
4343       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4344     }
4345     /* Check sum(n) = N */
4346     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4347     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4348 
4349     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4350     rstart -= m;
4351 
4352     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4353     for (i=0; i<m; i++) {
4354       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4355       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4356       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4357     }
4358 
4359     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4360     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4361     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4362     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4363     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4364     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4365     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4366     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4367   }
4368 
4369   /* numeric phase */
4370   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4371   for (i=0; i<m; i++) {
4372     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4373     Ii   = i + rstart;
4374     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4375     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4376   }
4377   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4378   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4379   PetscFunctionReturn(0);
4380 }
4381 
4382 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4383 {
4384   PetscErrorCode    ierr;
4385   PetscMPIInt       rank;
4386   PetscInt          m,N,i,rstart,nnz;
4387   size_t            len;
4388   const PetscInt    *indx;
4389   PetscViewer       out;
4390   char              *name;
4391   Mat               B;
4392   const PetscScalar *values;
4393 
4394   PetscFunctionBegin;
4395   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4396   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4397   /* Should this be the type of the diagonal block of A? */
4398   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4399   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4400   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4401   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4402   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4403   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4404   for (i=0; i<m; i++) {
4405     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4406     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4407     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4408   }
4409   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4410   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4411 
4412   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4413   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4414   ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr);
4415   sprintf(name,"%s.%d",outfile,rank);
4416   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4417   ierr = PetscFree(name);CHKERRQ(ierr);
4418   ierr = MatView(B,out);CHKERRQ(ierr);
4419   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4420   ierr = MatDestroy(&B);CHKERRQ(ierr);
4421   PetscFunctionReturn(0);
4422 }
4423 
4424 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4425 {
4426   PetscErrorCode      ierr;
4427   Mat_Merge_SeqsToMPI *merge;
4428   PetscContainer      container;
4429 
4430   PetscFunctionBegin;
4431   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4432   if (container) {
4433     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4434     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4435     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4436     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4437     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4438     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4439     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4440     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4441     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4442     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4443     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4444     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4445     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4446     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4447     ierr = PetscFree(merge);CHKERRQ(ierr);
4448     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4449   }
4450   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4451   PetscFunctionReturn(0);
4452 }
4453 
4454 #include <../src/mat/utils/freespace.h>
4455 #include <petscbt.h>
4456 
4457 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4458 {
4459   PetscErrorCode      ierr;
4460   MPI_Comm            comm;
4461   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4462   PetscMPIInt         size,rank,taga,*len_s;
4463   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4464   PetscInt            proc,m;
4465   PetscInt            **buf_ri,**buf_rj;
4466   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4467   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4468   MPI_Request         *s_waits,*r_waits;
4469   MPI_Status          *status;
4470   MatScalar           *aa=a->a;
4471   MatScalar           **abuf_r,*ba_i;
4472   Mat_Merge_SeqsToMPI *merge;
4473   PetscContainer      container;
4474 
4475   PetscFunctionBegin;
4476   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4477   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4478 
4479   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4480   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4481 
4482   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4483   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4484 
4485   bi     = merge->bi;
4486   bj     = merge->bj;
4487   buf_ri = merge->buf_ri;
4488   buf_rj = merge->buf_rj;
4489 
4490   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4491   owners = merge->rowmap->range;
4492   len_s  = merge->len_s;
4493 
4494   /* send and recv matrix values */
4495   /*-----------------------------*/
4496   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4497   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4498 
4499   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4500   for (proc=0,k=0; proc<size; proc++) {
4501     if (!len_s[proc]) continue;
4502     i    = owners[proc];
4503     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4504     k++;
4505   }
4506 
4507   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4508   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4509   ierr = PetscFree(status);CHKERRQ(ierr);
4510 
4511   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4512   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4513 
4514   /* insert mat values of mpimat */
4515   /*----------------------------*/
4516   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4517   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4518 
4519   for (k=0; k<merge->nrecv; k++) {
4520     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4521     nrows       = *(buf_ri_k[k]);
4522     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4523     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4524   }
4525 
4526   /* set values of ba */
4527   m = merge->rowmap->n;
4528   for (i=0; i<m; i++) {
4529     arow = owners[rank] + i;
4530     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4531     bnzi = bi[i+1] - bi[i];
4532     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4533 
4534     /* add local non-zero vals of this proc's seqmat into ba */
4535     anzi   = ai[arow+1] - ai[arow];
4536     aj     = a->j + ai[arow];
4537     aa     = a->a + ai[arow];
4538     nextaj = 0;
4539     for (j=0; nextaj<anzi; j++) {
4540       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4541         ba_i[j] += aa[nextaj++];
4542       }
4543     }
4544 
4545     /* add received vals into ba */
4546     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4547       /* i-th row */
4548       if (i == *nextrow[k]) {
4549         anzi   = *(nextai[k]+1) - *nextai[k];
4550         aj     = buf_rj[k] + *(nextai[k]);
4551         aa     = abuf_r[k] + *(nextai[k]);
4552         nextaj = 0;
4553         for (j=0; nextaj<anzi; j++) {
4554           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4555             ba_i[j] += aa[nextaj++];
4556           }
4557         }
4558         nextrow[k]++; nextai[k]++;
4559       }
4560     }
4561     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4562   }
4563   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4564   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4565 
4566   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4567   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4568   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4569   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4570   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4571   PetscFunctionReturn(0);
4572 }
4573 
4574 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4575 {
4576   PetscErrorCode      ierr;
4577   Mat                 B_mpi;
4578   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4579   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4580   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4581   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4582   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4583   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4584   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4585   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4586   MPI_Status          *status;
4587   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4588   PetscBT             lnkbt;
4589   Mat_Merge_SeqsToMPI *merge;
4590   PetscContainer      container;
4591 
4592   PetscFunctionBegin;
4593   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4594 
4595   /* make sure it is a PETSc comm */
4596   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4597   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4598   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4599 
4600   ierr = PetscNew(&merge);CHKERRQ(ierr);
4601   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4602 
4603   /* determine row ownership */
4604   /*---------------------------------------------------------*/
4605   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4606   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4607   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4608   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4609   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4610   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4611   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4612 
4613   m      = merge->rowmap->n;
4614   owners = merge->rowmap->range;
4615 
4616   /* determine the number of messages to send, their lengths */
4617   /*---------------------------------------------------------*/
4618   len_s = merge->len_s;
4619 
4620   len          = 0; /* length of buf_si[] */
4621   merge->nsend = 0;
4622   for (proc=0; proc<size; proc++) {
4623     len_si[proc] = 0;
4624     if (proc == rank) {
4625       len_s[proc] = 0;
4626     } else {
4627       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4628       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4629     }
4630     if (len_s[proc]) {
4631       merge->nsend++;
4632       nrows = 0;
4633       for (i=owners[proc]; i<owners[proc+1]; i++) {
4634         if (ai[i+1] > ai[i]) nrows++;
4635       }
4636       len_si[proc] = 2*(nrows+1);
4637       len         += len_si[proc];
4638     }
4639   }
4640 
4641   /* determine the number and length of messages to receive for ij-structure */
4642   /*-------------------------------------------------------------------------*/
4643   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4644   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4645 
4646   /* post the Irecv of j-structure */
4647   /*-------------------------------*/
4648   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4649   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4650 
4651   /* post the Isend of j-structure */
4652   /*--------------------------------*/
4653   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4654 
4655   for (proc=0, k=0; proc<size; proc++) {
4656     if (!len_s[proc]) continue;
4657     i    = owners[proc];
4658     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4659     k++;
4660   }
4661 
4662   /* receives and sends of j-structure are complete */
4663   /*------------------------------------------------*/
4664   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4665   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4666 
4667   /* send and recv i-structure */
4668   /*---------------------------*/
4669   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4670   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4671 
4672   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4673   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4674   for (proc=0,k=0; proc<size; proc++) {
4675     if (!len_s[proc]) continue;
4676     /* form outgoing message for i-structure:
4677          buf_si[0]:                 nrows to be sent
4678                [1:nrows]:           row index (global)
4679                [nrows+1:2*nrows+1]: i-structure index
4680     */
4681     /*-------------------------------------------*/
4682     nrows       = len_si[proc]/2 - 1;
4683     buf_si_i    = buf_si + nrows+1;
4684     buf_si[0]   = nrows;
4685     buf_si_i[0] = 0;
4686     nrows       = 0;
4687     for (i=owners[proc]; i<owners[proc+1]; i++) {
4688       anzi = ai[i+1] - ai[i];
4689       if (anzi) {
4690         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4691         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4692         nrows++;
4693       }
4694     }
4695     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4696     k++;
4697     buf_si += len_si[proc];
4698   }
4699 
4700   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4701   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4702 
4703   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4704   for (i=0; i<merge->nrecv; i++) {
4705     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4706   }
4707 
4708   ierr = PetscFree(len_si);CHKERRQ(ierr);
4709   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4710   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4711   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4712   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4713   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4714   ierr = PetscFree(status);CHKERRQ(ierr);
4715 
4716   /* compute a local seq matrix in each processor */
4717   /*----------------------------------------------*/
4718   /* allocate bi array and free space for accumulating nonzero column info */
4719   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4720   bi[0] = 0;
4721 
4722   /* create and initialize a linked list */
4723   nlnk = N+1;
4724   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4725 
4726   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4727   len  = ai[owners[rank+1]] - ai[owners[rank]];
4728   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4729 
4730   current_space = free_space;
4731 
4732   /* determine symbolic info for each local row */
4733   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4734 
4735   for (k=0; k<merge->nrecv; k++) {
4736     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4737     nrows       = *buf_ri_k[k];
4738     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4739     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4740   }
4741 
4742   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4743   len  = 0;
4744   for (i=0; i<m; i++) {
4745     bnzi = 0;
4746     /* add local non-zero cols of this proc's seqmat into lnk */
4747     arow  = owners[rank] + i;
4748     anzi  = ai[arow+1] - ai[arow];
4749     aj    = a->j + ai[arow];
4750     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4751     bnzi += nlnk;
4752     /* add received col data into lnk */
4753     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4754       if (i == *nextrow[k]) { /* i-th row */
4755         anzi  = *(nextai[k]+1) - *nextai[k];
4756         aj    = buf_rj[k] + *nextai[k];
4757         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4758         bnzi += nlnk;
4759         nextrow[k]++; nextai[k]++;
4760       }
4761     }
4762     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4763 
4764     /* if free space is not available, make more free space */
4765     if (current_space->local_remaining<bnzi) {
4766       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4767       nspacedouble++;
4768     }
4769     /* copy data into free space, then initialize lnk */
4770     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4771     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4772 
4773     current_space->array           += bnzi;
4774     current_space->local_used      += bnzi;
4775     current_space->local_remaining -= bnzi;
4776 
4777     bi[i+1] = bi[i] + bnzi;
4778   }
4779 
4780   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4781 
4782   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4783   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4784   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4785 
4786   /* create symbolic parallel matrix B_mpi */
4787   /*---------------------------------------*/
4788   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4789   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4790   if (n==PETSC_DECIDE) {
4791     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4792   } else {
4793     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4794   }
4795   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4796   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4797   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4798   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4799   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4800 
4801   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4802   B_mpi->assembled    = PETSC_FALSE;
4803   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4804   merge->bi           = bi;
4805   merge->bj           = bj;
4806   merge->buf_ri       = buf_ri;
4807   merge->buf_rj       = buf_rj;
4808   merge->coi          = NULL;
4809   merge->coj          = NULL;
4810   merge->owners_co    = NULL;
4811 
4812   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4813 
4814   /* attach the supporting struct to B_mpi for reuse */
4815   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4816   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4817   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4818   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4819   *mpimat = B_mpi;
4820 
4821   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4822   PetscFunctionReturn(0);
4823 }
4824 
4825 /*@C
4826       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4827                  matrices from each processor
4828 
4829     Collective on MPI_Comm
4830 
4831    Input Parameters:
4832 +    comm - the communicators the parallel matrix will live on
4833 .    seqmat - the input sequential matrices
4834 .    m - number of local rows (or PETSC_DECIDE)
4835 .    n - number of local columns (or PETSC_DECIDE)
4836 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4837 
4838    Output Parameter:
4839 .    mpimat - the parallel matrix generated
4840 
4841     Level: advanced
4842 
4843    Notes:
4844      The dimensions of the sequential matrix in each processor MUST be the same.
4845      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4846      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4847 @*/
4848 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4849 {
4850   PetscErrorCode ierr;
4851   PetscMPIInt    size;
4852 
4853   PetscFunctionBegin;
4854   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4855   if (size == 1) {
4856     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4857     if (scall == MAT_INITIAL_MATRIX) {
4858       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4859     } else {
4860       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4861     }
4862     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4863     PetscFunctionReturn(0);
4864   }
4865   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4866   if (scall == MAT_INITIAL_MATRIX) {
4867     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4868   }
4869   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4870   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4871   PetscFunctionReturn(0);
4872 }
4873 
4874 /*@
4875      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
4876           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4877           with MatGetSize()
4878 
4879     Not Collective
4880 
4881    Input Parameters:
4882 +    A - the matrix
4883 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4884 
4885    Output Parameter:
4886 .    A_loc - the local sequential matrix generated
4887 
4888     Level: developer
4889 
4890 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4891 
4892 @*/
4893 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4894 {
4895   PetscErrorCode ierr;
4896   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4897   Mat_SeqAIJ     *mat,*a,*b;
4898   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4899   MatScalar      *aa,*ba,*cam;
4900   PetscScalar    *ca;
4901   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4902   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4903   PetscBool      match;
4904   MPI_Comm       comm;
4905   PetscMPIInt    size;
4906 
4907   PetscFunctionBegin;
4908   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4909   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
4910   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4911   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4912   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4913 
4914   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4915   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4916   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4917   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4918   aa = a->a; ba = b->a;
4919   if (scall == MAT_INITIAL_MATRIX) {
4920     if (size == 1) {
4921       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4922       PetscFunctionReturn(0);
4923     }
4924 
4925     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
4926     ci[0] = 0;
4927     for (i=0; i<am; i++) {
4928       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4929     }
4930     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
4931     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
4932     k    = 0;
4933     for (i=0; i<am; i++) {
4934       ncols_o = bi[i+1] - bi[i];
4935       ncols_d = ai[i+1] - ai[i];
4936       /* off-diagonal portion of A */
4937       for (jo=0; jo<ncols_o; jo++) {
4938         col = cmap[*bj];
4939         if (col >= cstart) break;
4940         cj[k]   = col; bj++;
4941         ca[k++] = *ba++;
4942       }
4943       /* diagonal portion of A */
4944       for (j=0; j<ncols_d; j++) {
4945         cj[k]   = cstart + *aj++;
4946         ca[k++] = *aa++;
4947       }
4948       /* off-diagonal portion of A */
4949       for (j=jo; j<ncols_o; j++) {
4950         cj[k]   = cmap[*bj++];
4951         ca[k++] = *ba++;
4952       }
4953     }
4954     /* put together the new matrix */
4955     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4956     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4957     /* Since these are PETSc arrays, change flags to free them as necessary. */
4958     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4959     mat->free_a  = PETSC_TRUE;
4960     mat->free_ij = PETSC_TRUE;
4961     mat->nonew   = 0;
4962   } else if (scall == MAT_REUSE_MATRIX) {
4963     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4964     ci = mat->i; cj = mat->j; cam = mat->a;
4965     for (i=0; i<am; i++) {
4966       /* off-diagonal portion of A */
4967       ncols_o = bi[i+1] - bi[i];
4968       for (jo=0; jo<ncols_o; jo++) {
4969         col = cmap[*bj];
4970         if (col >= cstart) break;
4971         *cam++ = *ba++; bj++;
4972       }
4973       /* diagonal portion of A */
4974       ncols_d = ai[i+1] - ai[i];
4975       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4976       /* off-diagonal portion of A */
4977       for (j=jo; j<ncols_o; j++) {
4978         *cam++ = *ba++; bj++;
4979       }
4980     }
4981   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4982   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4983   PetscFunctionReturn(0);
4984 }
4985 
4986 /*@C
4987      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
4988 
4989     Not Collective
4990 
4991    Input Parameters:
4992 +    A - the matrix
4993 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4994 -    row, col - index sets of rows and columns to extract (or NULL)
4995 
4996    Output Parameter:
4997 .    A_loc - the local sequential matrix generated
4998 
4999     Level: developer
5000 
5001 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5002 
5003 @*/
5004 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5005 {
5006   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5007   PetscErrorCode ierr;
5008   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5009   IS             isrowa,iscola;
5010   Mat            *aloc;
5011   PetscBool      match;
5012 
5013   PetscFunctionBegin;
5014   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5015   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5016   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5017   if (!row) {
5018     start = A->rmap->rstart; end = A->rmap->rend;
5019     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5020   } else {
5021     isrowa = *row;
5022   }
5023   if (!col) {
5024     start = A->cmap->rstart;
5025     cmap  = a->garray;
5026     nzA   = a->A->cmap->n;
5027     nzB   = a->B->cmap->n;
5028     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5029     ncols = 0;
5030     for (i=0; i<nzB; i++) {
5031       if (cmap[i] < start) idx[ncols++] = cmap[i];
5032       else break;
5033     }
5034     imark = i;
5035     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5036     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5037     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5038   } else {
5039     iscola = *col;
5040   }
5041   if (scall != MAT_INITIAL_MATRIX) {
5042     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5043     aloc[0] = *A_loc;
5044   }
5045   ierr   = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5046   *A_loc = aloc[0];
5047   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5048   if (!row) {
5049     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5050   }
5051   if (!col) {
5052     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5053   }
5054   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5055   PetscFunctionReturn(0);
5056 }
5057 
5058 /*@C
5059     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5060 
5061     Collective on Mat
5062 
5063    Input Parameters:
5064 +    A,B - the matrices in mpiaij format
5065 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5066 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5067 
5068    Output Parameter:
5069 +    rowb, colb - index sets of rows and columns of B to extract
5070 -    B_seq - the sequential matrix generated
5071 
5072     Level: developer
5073 
5074 @*/
5075 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5076 {
5077   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5078   PetscErrorCode ierr;
5079   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5080   IS             isrowb,iscolb;
5081   Mat            *bseq=NULL;
5082 
5083   PetscFunctionBegin;
5084   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5085     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5086   }
5087   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5088 
5089   if (scall == MAT_INITIAL_MATRIX) {
5090     start = A->cmap->rstart;
5091     cmap  = a->garray;
5092     nzA   = a->A->cmap->n;
5093     nzB   = a->B->cmap->n;
5094     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5095     ncols = 0;
5096     for (i=0; i<nzB; i++) {  /* row < local row index */
5097       if (cmap[i] < start) idx[ncols++] = cmap[i];
5098       else break;
5099     }
5100     imark = i;
5101     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5102     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5103     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5104     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5105   } else {
5106     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5107     isrowb  = *rowb; iscolb = *colb;
5108     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5109     bseq[0] = *B_seq;
5110   }
5111   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5112   *B_seq = bseq[0];
5113   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5114   if (!rowb) {
5115     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5116   } else {
5117     *rowb = isrowb;
5118   }
5119   if (!colb) {
5120     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5121   } else {
5122     *colb = iscolb;
5123   }
5124   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5125   PetscFunctionReturn(0);
5126 }
5127 
5128 /*
5129     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5130     of the OFF-DIAGONAL portion of local A
5131 
5132     Collective on Mat
5133 
5134    Input Parameters:
5135 +    A,B - the matrices in mpiaij format
5136 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5137 
5138    Output Parameter:
5139 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5140 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5141 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5142 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5143 
5144     Level: developer
5145 
5146 */
5147 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5148 {
5149   VecScatter_MPI_General *gen_to,*gen_from;
5150   PetscErrorCode         ierr;
5151   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5152   Mat_SeqAIJ             *b_oth;
5153   VecScatter             ctx;
5154   MPI_Comm               comm;
5155   PetscMPIInt            *rprocs,*sprocs,tag,rank;
5156   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5157   PetscInt               *rvalues,*svalues,*cols,sbs,rbs;
5158   PetscScalar              *b_otha,*bufa,*bufA,*vals;
5159   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5160   MPI_Request            *rwaits = NULL,*swaits = NULL;
5161   MPI_Status             *sstatus,rstatus;
5162   PetscMPIInt            jj,size;
5163   VecScatterType         type;
5164   PetscBool              mpi1;
5165 
5166   PetscFunctionBegin;
5167   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5168   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5169 
5170   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5171     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5172   }
5173   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5174   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5175 
5176   if (size == 1) {
5177     startsj_s = NULL;
5178     bufa_ptr  = NULL;
5179     *B_oth    = NULL;
5180     PetscFunctionReturn(0);
5181   }
5182 
5183   ctx = a->Mvctx;
5184   ierr = VecScatterGetType(ctx,&type);CHKERRQ(ierr);
5185   ierr = PetscStrcmp(type,"mpi1",&mpi1);CHKERRQ(ierr);
5186   if (!mpi1) {
5187     /* a->Mvctx is not type MPI1 which is not implemented for Mat-Mat ops,
5188      thus create a->Mvctx_mpi1 */
5189     if (!a->Mvctx_mpi1) {
5190       a->Mvctx_mpi1_flg = PETSC_TRUE;
5191       ierr = MatSetUpMultiply_MPIAIJ(A);CHKERRQ(ierr);
5192     }
5193     ctx = a->Mvctx_mpi1;
5194   }
5195   tag = ((PetscObject)ctx)->tag;
5196 
5197   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5198   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5199   nrecvs   = gen_from->n;
5200   nsends   = gen_to->n;
5201 
5202   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5203   srow    = gen_to->indices;    /* local row index to be sent */
5204   sstarts = gen_to->starts;
5205   sprocs  = gen_to->procs;
5206   sstatus = gen_to->sstatus;
5207   sbs     = gen_to->bs;
5208   rstarts = gen_from->starts;
5209   rprocs  = gen_from->procs;
5210   rbs     = gen_from->bs;
5211 
5212   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5213   if (scall == MAT_INITIAL_MATRIX) {
5214     /* i-array */
5215     /*---------*/
5216     /*  post receives */
5217     ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);
5218     for (i=0; i<nrecvs; i++) {
5219       rowlen = rvalues + rstarts[i]*rbs;
5220       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5221       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5222     }
5223 
5224     /* pack the outgoing message */
5225     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5226 
5227     sstartsj[0] = 0;
5228     rstartsj[0] = 0;
5229     len         = 0; /* total length of j or a array to be sent */
5230     k           = 0;
5231     ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr);
5232     for (i=0; i<nsends; i++) {
5233       rowlen = svalues + sstarts[i]*sbs;
5234       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5235       for (j=0; j<nrows; j++) {
5236         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5237         for (l=0; l<sbs; l++) {
5238           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5239 
5240           rowlen[j*sbs+l] = ncols;
5241 
5242           len += ncols;
5243           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5244         }
5245         k++;
5246       }
5247       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5248 
5249       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5250     }
5251     /* recvs and sends of i-array are completed */
5252     i = nrecvs;
5253     while (i--) {
5254       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5255     }
5256     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5257     ierr = PetscFree(svalues);CHKERRQ(ierr);
5258 
5259     /* allocate buffers for sending j and a arrays */
5260     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5261     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5262 
5263     /* create i-array of B_oth */
5264     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5265 
5266     b_othi[0] = 0;
5267     len       = 0; /* total length of j or a array to be received */
5268     k         = 0;
5269     for (i=0; i<nrecvs; i++) {
5270       rowlen = rvalues + rstarts[i]*rbs;
5271       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */
5272       for (j=0; j<nrows; j++) {
5273         b_othi[k+1] = b_othi[k] + rowlen[j];
5274         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5275         k++;
5276       }
5277       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5278     }
5279     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5280 
5281     /* allocate space for j and a arrrays of B_oth */
5282     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5283     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5284 
5285     /* j-array */
5286     /*---------*/
5287     /*  post receives of j-array */
5288     for (i=0; i<nrecvs; i++) {
5289       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5290       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5291     }
5292 
5293     /* pack the outgoing message j-array */
5294     k = 0;
5295     for (i=0; i<nsends; i++) {
5296       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5297       bufJ  = bufj+sstartsj[i];
5298       for (j=0; j<nrows; j++) {
5299         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5300         for (ll=0; ll<sbs; ll++) {
5301           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5302           for (l=0; l<ncols; l++) {
5303             *bufJ++ = cols[l];
5304           }
5305           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5306         }
5307       }
5308       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5309     }
5310 
5311     /* recvs and sends of j-array are completed */
5312     i = nrecvs;
5313     while (i--) {
5314       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5315     }
5316     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5317   } else if (scall == MAT_REUSE_MATRIX) {
5318     sstartsj = *startsj_s;
5319     rstartsj = *startsj_r;
5320     bufa     = *bufa_ptr;
5321     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5322     b_otha   = b_oth->a;
5323   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5324 
5325   /* a-array */
5326   /*---------*/
5327   /*  post receives of a-array */
5328   for (i=0; i<nrecvs; i++) {
5329     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5330     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5331   }
5332 
5333   /* pack the outgoing message a-array */
5334   k = 0;
5335   for (i=0; i<nsends; i++) {
5336     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5337     bufA  = bufa+sstartsj[i];
5338     for (j=0; j<nrows; j++) {
5339       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5340       for (ll=0; ll<sbs; ll++) {
5341         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5342         for (l=0; l<ncols; l++) {
5343           *bufA++ = vals[l];
5344         }
5345         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5346       }
5347     }
5348     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5349   }
5350   /* recvs and sends of a-array are completed */
5351   i = nrecvs;
5352   while (i--) {
5353     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5354   }
5355   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5356   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5357 
5358   if (scall == MAT_INITIAL_MATRIX) {
5359     /* put together the new matrix */
5360     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5361 
5362     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5363     /* Since these are PETSc arrays, change flags to free them as necessary. */
5364     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5365     b_oth->free_a  = PETSC_TRUE;
5366     b_oth->free_ij = PETSC_TRUE;
5367     b_oth->nonew   = 0;
5368 
5369     ierr = PetscFree(bufj);CHKERRQ(ierr);
5370     if (!startsj_s || !bufa_ptr) {
5371       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5372       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5373     } else {
5374       *startsj_s = sstartsj;
5375       *startsj_r = rstartsj;
5376       *bufa_ptr  = bufa;
5377     }
5378   }
5379   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5380   PetscFunctionReturn(0);
5381 }
5382 
5383 /*@C
5384   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5385 
5386   Not Collective
5387 
5388   Input Parameters:
5389 . A - The matrix in mpiaij format
5390 
5391   Output Parameter:
5392 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5393 . colmap - A map from global column index to local index into lvec
5394 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5395 
5396   Level: developer
5397 
5398 @*/
5399 #if defined(PETSC_USE_CTABLE)
5400 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5401 #else
5402 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5403 #endif
5404 {
5405   Mat_MPIAIJ *a;
5406 
5407   PetscFunctionBegin;
5408   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5409   PetscValidPointer(lvec, 2);
5410   PetscValidPointer(colmap, 3);
5411   PetscValidPointer(multScatter, 4);
5412   a = (Mat_MPIAIJ*) A->data;
5413   if (lvec) *lvec = a->lvec;
5414   if (colmap) *colmap = a->colmap;
5415   if (multScatter) *multScatter = a->Mvctx;
5416   PetscFunctionReturn(0);
5417 }
5418 
5419 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5420 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5421 #if defined(PETSC_HAVE_MKL_SPARSE)
5422 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5423 #endif
5424 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5425 #if defined(PETSC_HAVE_ELEMENTAL)
5426 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5427 #endif
5428 #if defined(PETSC_HAVE_HYPRE)
5429 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5430 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*);
5431 #endif
5432 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*);
5433 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5434 
5435 /*
5436     Computes (B'*A')' since computing B*A directly is untenable
5437 
5438                n                       p                          p
5439         (              )       (              )         (                  )
5440       m (      A       )  *  n (       B      )   =   m (         C        )
5441         (              )       (              )         (                  )
5442 
5443 */
5444 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5445 {
5446   PetscErrorCode ierr;
5447   Mat            At,Bt,Ct;
5448 
5449   PetscFunctionBegin;
5450   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5451   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5452   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5453   ierr = MatDestroy(&At);CHKERRQ(ierr);
5454   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5455   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5456   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5457   PetscFunctionReturn(0);
5458 }
5459 
5460 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5461 {
5462   PetscErrorCode ierr;
5463   PetscInt       m=A->rmap->n,n=B->cmap->n;
5464   Mat            Cmat;
5465 
5466   PetscFunctionBegin;
5467   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5468   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5469   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5470   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5471   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5472   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5473   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5474   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5475 
5476   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5477 
5478   *C = Cmat;
5479   PetscFunctionReturn(0);
5480 }
5481 
5482 /* ----------------------------------------------------------------*/
5483 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5484 {
5485   PetscErrorCode ierr;
5486 
5487   PetscFunctionBegin;
5488   if (scall == MAT_INITIAL_MATRIX) {
5489     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5490     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5491     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5492   }
5493   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5494   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5495   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5496   PetscFunctionReturn(0);
5497 }
5498 
5499 /*MC
5500    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5501 
5502    Options Database Keys:
5503 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5504 
5505   Level: beginner
5506 
5507 .seealso: MatCreateAIJ()
5508 M*/
5509 
5510 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5511 {
5512   Mat_MPIAIJ     *b;
5513   PetscErrorCode ierr;
5514   PetscMPIInt    size;
5515 
5516   PetscFunctionBegin;
5517   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5518 
5519   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5520   B->data       = (void*)b;
5521   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5522   B->assembled  = PETSC_FALSE;
5523   B->insertmode = NOT_SET_VALUES;
5524   b->size       = size;
5525 
5526   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5527 
5528   /* build cache for off array entries formed */
5529   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5530 
5531   b->donotstash  = PETSC_FALSE;
5532   b->colmap      = 0;
5533   b->garray      = 0;
5534   b->roworiented = PETSC_TRUE;
5535 
5536   /* stuff used for matrix vector multiply */
5537   b->lvec  = NULL;
5538   b->Mvctx = NULL;
5539 
5540   /* stuff for MatGetRow() */
5541   b->rowindices   = 0;
5542   b->rowvalues    = 0;
5543   b->getrowactive = PETSC_FALSE;
5544 
5545   /* flexible pointer used in CUSP/CUSPARSE classes */
5546   b->spptr = NULL;
5547 
5548   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
5549   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5550   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5551   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5552   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5553   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
5554   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5555   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5556   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5557 #if defined(PETSC_HAVE_MKL_SPARSE)
5558   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
5559 #endif
5560   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5561   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5562 #if defined(PETSC_HAVE_ELEMENTAL)
5563   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5564 #endif
5565 #if defined(PETSC_HAVE_HYPRE)
5566   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
5567 #endif
5568   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr);
5569   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
5570   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5571   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5572   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5573 #if defined(PETSC_HAVE_HYPRE)
5574   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr);
5575 #endif
5576   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5577   PetscFunctionReturn(0);
5578 }
5579 
5580 /*@C
5581      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5582          and "off-diagonal" part of the matrix in CSR format.
5583 
5584    Collective on MPI_Comm
5585 
5586    Input Parameters:
5587 +  comm - MPI communicator
5588 .  m - number of local rows (Cannot be PETSC_DECIDE)
5589 .  n - This value should be the same as the local size used in creating the
5590        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5591        calculated if N is given) For square matrices n is almost always m.
5592 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5593 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5594 .   i - row indices for "diagonal" portion of matrix
5595 .   j - column indices
5596 .   a - matrix values
5597 .   oi - row indices for "off-diagonal" portion of matrix
5598 .   oj - column indices
5599 -   oa - matrix values
5600 
5601    Output Parameter:
5602 .   mat - the matrix
5603 
5604    Level: advanced
5605 
5606    Notes:
5607        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5608        must free the arrays once the matrix has been destroyed and not before.
5609 
5610        The i and j indices are 0 based
5611 
5612        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5613 
5614        This sets local rows and cannot be used to set off-processor values.
5615 
5616        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5617        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5618        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5619        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5620        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5621        communication if it is known that only local entries will be set.
5622 
5623 .keywords: matrix, aij, compressed row, sparse, parallel
5624 
5625 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5626           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5627 @*/
5628 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5629 {
5630   PetscErrorCode ierr;
5631   Mat_MPIAIJ     *maij;
5632 
5633   PetscFunctionBegin;
5634   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5635   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5636   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5637   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5638   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5639   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5640   maij = (Mat_MPIAIJ*) (*mat)->data;
5641 
5642   (*mat)->preallocated = PETSC_TRUE;
5643 
5644   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5645   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5646 
5647   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5648   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5649 
5650   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5651   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5652   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5653   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5654 
5655   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
5656   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5657   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5658   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
5659   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5660   PetscFunctionReturn(0);
5661 }
5662 
5663 /*
5664     Special version for direct calls from Fortran
5665 */
5666 #include <petsc/private/fortranimpl.h>
5667 
5668 /* Change these macros so can be used in void function */
5669 #undef CHKERRQ
5670 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5671 #undef SETERRQ2
5672 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5673 #undef SETERRQ3
5674 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5675 #undef SETERRQ
5676 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5677 
5678 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5679 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5680 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5681 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5682 #else
5683 #endif
5684 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5685 {
5686   Mat            mat  = *mmat;
5687   PetscInt       m    = *mm, n = *mn;
5688   InsertMode     addv = *maddv;
5689   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5690   PetscScalar    value;
5691   PetscErrorCode ierr;
5692 
5693   MatCheckPreallocated(mat,1);
5694   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5695 
5696 #if defined(PETSC_USE_DEBUG)
5697   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5698 #endif
5699   {
5700     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5701     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5702     PetscBool roworiented = aij->roworiented;
5703 
5704     /* Some Variables required in the macro */
5705     Mat        A                 = aij->A;
5706     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5707     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5708     MatScalar  *aa               = a->a;
5709     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5710     Mat        B                 = aij->B;
5711     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5712     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5713     MatScalar  *ba               = b->a;
5714 
5715     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5716     PetscInt  nonew = a->nonew;
5717     MatScalar *ap1,*ap2;
5718 
5719     PetscFunctionBegin;
5720     for (i=0; i<m; i++) {
5721       if (im[i] < 0) continue;
5722 #if defined(PETSC_USE_DEBUG)
5723       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5724 #endif
5725       if (im[i] >= rstart && im[i] < rend) {
5726         row      = im[i] - rstart;
5727         lastcol1 = -1;
5728         rp1      = aj + ai[row];
5729         ap1      = aa + ai[row];
5730         rmax1    = aimax[row];
5731         nrow1    = ailen[row];
5732         low1     = 0;
5733         high1    = nrow1;
5734         lastcol2 = -1;
5735         rp2      = bj + bi[row];
5736         ap2      = ba + bi[row];
5737         rmax2    = bimax[row];
5738         nrow2    = bilen[row];
5739         low2     = 0;
5740         high2    = nrow2;
5741 
5742         for (j=0; j<n; j++) {
5743           if (roworiented) value = v[i*n+j];
5744           else value = v[i+j*m];
5745           if (in[j] >= cstart && in[j] < cend) {
5746             col = in[j] - cstart;
5747             if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5748             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
5749           } else if (in[j] < 0) continue;
5750 #if defined(PETSC_USE_DEBUG)
5751           /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
5752           else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
5753 #endif
5754           else {
5755             if (mat->was_assembled) {
5756               if (!aij->colmap) {
5757                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5758               }
5759 #if defined(PETSC_USE_CTABLE)
5760               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5761               col--;
5762 #else
5763               col = aij->colmap[in[j]] - 1;
5764 #endif
5765               if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue;
5766               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5767                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5768                 col  =  in[j];
5769                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5770                 B     = aij->B;
5771                 b     = (Mat_SeqAIJ*)B->data;
5772                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5773                 rp2   = bj + bi[row];
5774                 ap2   = ba + bi[row];
5775                 rmax2 = bimax[row];
5776                 nrow2 = bilen[row];
5777                 low2  = 0;
5778                 high2 = nrow2;
5779                 bm    = aij->B->rmap->n;
5780                 ba    = b->a;
5781               }
5782             } else col = in[j];
5783             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
5784           }
5785         }
5786       } else if (!aij->donotstash) {
5787         if (roworiented) {
5788           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5789         } else {
5790           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5791         }
5792       }
5793     }
5794   }
5795   PetscFunctionReturnVoid();
5796 }
5797 
5798